Importing Packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.pylab import rcParams
rcParams['figure.figsize']=12,8
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.preprocessing import PowerTransformer
from sklearn.preprocessing import MinMaxScaler,StandardScaler,MaxAbsScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import QuantileTransformer
from sklearn.preprocessing import FunctionTransformer
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import itertools

Removing Duplicate Rows

In [2]:
data=pd.read_excel('Combine data/sectorwise.xlsx')
data=data.drop_duplicates()
In [3]:
data.head()
Out[3]:
Job_Title Company_Name Location Posted_Date Salary Experience Skill Sector
0 Role Designation- Consultant Infosys Technologies ltd Nagpur 2019-12-22 Not disclosed 5-10 yrs Order management, Procurement, Oracle Apps, Su... Transport and logistics
1 Salesforce Developer- Nagpur Persistent Systems Limited. Nagpur 2019-12-23 Not disclosed 1-4 yrs Educational Qualification, Load Balancing, Ape... Sales
2 Consultant Physician & Intensivist Crassula Healthcare Solution Bhopal, Jabalpur, Mumbai Suburbs, Mundra, Nagp... 2019-12-23 Not disclosed 0-5 yrs Medicine, intensive care, critical care, Inter... Healthcare
3 Consultant Anaesthesia- ICU & Critical Care Crassula Healthcare Solution Mumbai, Bhopal, Nagpur, Surat 2019-12-23 Not disclosed 0-5 yrs intensive care, anesthesiologist, md, dnb, icu... Healthcare
4 Product Manager (pharma) L A Consultancy Chennai, Nagpur, Mumbai, Pune, Kochi, Bengaluru 2019-12-23 Not disclosed 5-10 yrs R&D, technical, SPC, product manager, strategy... Healthcare
In [4]:
df1=data[['Job_Title','Company_Name','Posted_Date','Sector']]
df1=df1.drop_duplicates()

df1['Posted_Date']=pd.to_datetime(df1['Posted_Date'])
df=df1.groupby('Posted_Date')['Posted_Date'].count()

df.describe()
Out[4]:
count    110.000000
mean      74.263636
std       51.903523
min        1.000000
25%       34.000000
50%       61.000000
75%      107.000000
max      238.000000
Name: Posted_Date, dtype: float64
In [5]:
sector=df1.groupby('Sector')['Sector'].count()
sector.sort_values(ascending=False)
Out[5]:
Sector
Sales                                     1394
Information Technology                    1206
Retail                                     872
Business Consulting and Management         652
Teacher Training and Education             562
Marketing, Advertising and PR              536
Engineering and Manufacturing              465
Business, consulting and management        338
Healthcare                                 320
Accounting, Finance and Banking            297
Accountancy, banking, and finance jobs     236
Recruitment and HR                         187
Marketing, Advertising, and PR             165
Media and Internet                         102
Media and the Internet                     100
Property and Consultation                   97
Transport and logistics                     88
Marketing Advertising and PR                75
Transport and Logistics                     69
business, consulting and management         63
Hospitality and Event Management            58
Public service and Administration           39
Teacher, Training and Education             36
Accounting , Finance and Banking            34
Law                                         30
Public services and Administration          24
Leisure, Sports and Tourism                 23
public services and Administration          21
Science and Pharmaceuticals                 11
Environment and Agriculture                 10
Law enforcement and security                 9
Trasport and Logistics                       8
Energy and utilities                         4
Leisure, Sports, and Tourism                 4
Science and Pharmaceuticals \n               4
Social care                                  3
Finance                                      1
marketing, Advertising, and PR               1
Information Technology \n                    1
Name: Sector, dtype: int64

Replacing misspelled Sector names with correct one

In [6]:
df1['Sector'].replace(['Information Technology \n'],'Information Technology',inplace=True)
df1['Sector'].replace(['Business, consulting and management','business, consulting and management',''],'Business Consulting and Management',inplace=True)
df1['Sector'].replace(['Marketing, Advertising, and PR','marketing, Advertising, and PR','Marketing, Advertising and PR','Marketing Advertising and PR'],'Marketing, Advertising and PR',inplace=True)
df1['Sector'].replace(['Accountancy, banking, and finance jobs','Finance','Accounting , Finance and Banking'],'Accounting, Finance and Banking',inplace=True)
df1['Sector'].replace(['Public service and Administration','Public services and Administration','public services and Administration'],'Public Service and Administration',inplace=True)
df1['Sector'].replace(['Science and Pharmaceuticals \n'],'Science and Pharmaceuticals',inplace=True)
df1['Sector'].replace(['Leisure, Sports, and Tourism'],'Leisure, Sports and Tourism',inplace=True)
df1['Sector'].replace(['Transport and logistics','Trasport and Logistics'],'Transport and Logistics',inplace=True)
df1['Sector'].replace(['Media and the Internet'],'Media and Internet',inplace=True)
df1['Sector'].replace(['Teacher, Training and Education'],'Teacher Training and Education',inplace=True)
df1['Sector'].replace(['Social care'],'Social Care',inplace=True)
df1['Sector'].replace(['Energy and utilities'],'Energy and Utilities',inplace=True)
df1['Sector'].replace(['Law enforcement and security'],'Law Enforcement and Security',inplace=True)
In [7]:
sector=df1.groupby('Sector')['Sector'].count()
sector.sort_values(ascending=False)
Out[7]:
Sector
Sales                                 1394
Information Technology                1207
Business Consulting and Management    1053
Retail                                 872
Marketing, Advertising and PR          777
Teacher Training and Education         598
Accounting, Finance and Banking        568
Engineering and Manufacturing          465
Healthcare                             320
Media and Internet                     202
Recruitment and HR                     187
Transport and Logistics                165
Property and Consultation               97
Public Service and Administration       84
Hospitality and Event Management        58
Law                                     30
Leisure, Sports and Tourism             27
Science and Pharmaceuticals             15
Environment and Agriculture             10
Law Enforcement and Security             9
Energy and Utilities                     4
Social Care                              3
Name: Sector, dtype: int64

Plotting Sector-wise Time Series data

In [8]:
fig1, ax1 = plt.subplots(figsize=(25, 18))
sns.set(font_scale=3) 
ax1.pie(sector.sort_values(ascending=False)[0:10], labels=sector.sort_values(ascending=False).index[0:10], autopct='%1.0f%%', startangle=90,textprops={'fontsize': 25})
ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
ax1.set_title('Top 10 Sectors\n',fontweight='bold')
plt.show()
plt.tight_layout()
<Figure size 432x288 with 0 Axes>
In [9]:
sec=sector.sort_values(ascending=False)
fig_dims = (25, 15)
sns.set(font_scale=3) 
fig, ax = plt.subplots(figsize=fig_dims)
sns.barplot(x=sec.values,y=sec.index,ax=ax)
plt.ylabel('Sector',fontweight='bold')
ax.set_title('Sector-wise Job Postings\n',fontweight='bold')
Out[9]:
Text(0.5,1,'Sector-wise Job Postings\n')
In [131]:
fig, ax1 = plt.subplots(len(sec.index),figsize=(15,50))
#sns.set(font_scale=1) 
plot=0
print('Sector-wise Job Postings')
print('__________________________________________________________________________________________________________________')
for i in sec.index:
    it=df1[df1['Sector']==i]
    
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    sns.set(font_scale=1) 
    ax1[plot].plot(d_it,label=i)  
    ax1[plot].legend()
    plot=plot+1  
Sector-wise Job Postings
__________________________________________________________________________________________________________________

Checking Stationarity in time series

  • Determing rolling statistics
  • Perform Dickey-Fuller test
In [129]:
from statsmodels.tsa.stattools import adfuller
def test_stationarity(timeseries):
    
    #Determing rolling statistics
    rolmean=timeseries.rolling(window=10).mean()
    rolstd=timeseries.rolling(window=10).std()

    #Plot rolling statistics:
    orig = plt.plot(timeseries, color='blue',label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    std = plt.plot(rolstd, color='black', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show(block=False)
    
    #Perform Dickey-Fuller test:
    print ('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print(dfoutput)
In [12]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    test_stationarity(d_it)
    print('------------------------------------------------------')
Sales
Results of Dickey-Fuller Test:
Test Statistic                -6.358995e+00
p-value                        2.501454e-08
#Lags Used                     0.000000e+00
Number of Observations Used    1.070000e+02
Critical Value (1%)           -3.492996e+00
Critical Value (5%)           -2.888955e+00
Critical Value (10%)          -2.581393e+00
dtype: float64
------------------------------------------------------
Information Technology
Results of Dickey-Fuller Test:
Test Statistic                -6.455822e+00
p-value                        1.484382e-08
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Business Consulting and Management
Results of Dickey-Fuller Test:
Test Statistic                -7.549097e+00
p-value                        3.224296e-11
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Retail
Results of Dickey-Fuller Test:
Test Statistic                  -3.920508
p-value                          0.001889
#Lags Used                       1.000000
Number of Observations Used    101.000000
Critical Value (1%)             -3.496818
Critical Value (5%)             -2.890611
Critical Value (10%)            -2.582277
dtype: float64
------------------------------------------------------
Marketing, Advertising and PR
Results of Dickey-Fuller Test:
Test Statistic                -5.852893e+00
p-value                        3.555029e-07
#Lags Used                     0.000000e+00
Number of Observations Used    9.400000e+01
Critical Value (1%)           -3.501912e+00
Critical Value (5%)           -2.892815e+00
Critical Value (10%)          -2.583454e+00
dtype: float64
------------------------------------------------------
Teacher Training and Education
Results of Dickey-Fuller Test:
Test Statistic                -9.778959e+00
p-value                        6.814323e-17
#Lags Used                     0.000000e+00
Number of Observations Used    8.700000e+01
Critical Value (1%)           -3.507853e+00
Critical Value (5%)           -2.895382e+00
Critical Value (10%)          -2.584824e+00
dtype: float64
------------------------------------------------------
Accounting, Finance and Banking
Results of Dickey-Fuller Test:
Test Statistic                 -4.837813
p-value                         0.000046
#Lags Used                      0.000000
Number of Observations Used    97.000000
Critical Value (1%)            -3.499637
Critical Value (5%)            -2.891831
Critical Value (10%)           -2.582928
dtype: float64
------------------------------------------------------
Engineering and Manufacturing
Results of Dickey-Fuller Test:
Test Statistic                -8.544093e+00
p-value                        9.607190e-14
#Lags Used                     0.000000e+00
Number of Observations Used    8.400000e+01
Critical Value (1%)           -3.510712e+00
Critical Value (5%)           -2.896616e+00
Critical Value (10%)          -2.585482e+00
dtype: float64
------------------------------------------------------
Healthcare
Results of Dickey-Fuller Test:
Test Statistic                -8.476088e+00
p-value                        1.434169e-13
#Lags Used                     0.000000e+00
Number of Observations Used    8.200000e+01
Critical Value (1%)           -3.512738e+00
Critical Value (5%)           -2.897490e+00
Critical Value (10%)          -2.585949e+00
dtype: float64
------------------------------------------------------
Media and Internet
Results of Dickey-Fuller Test:
Test Statistic                -7.826999e+00
p-value                        6.441844e-12
#Lags Used                     1.000000e+00
Number of Observations Used    7.500000e+01
Critical Value (1%)           -3.520713e+00
Critical Value (5%)           -2.900925e+00
Critical Value (10%)          -2.587781e+00
dtype: float64
------------------------------------------------------
Recruitment and HR
Results of Dickey-Fuller Test:
Test Statistic                -7.425173e+00
p-value                        6.577578e-11
#Lags Used                     0.000000e+00
Number of Observations Used    6.700000e+01
Critical Value (1%)           -3.531955e+00
Critical Value (5%)           -2.905755e+00
Critical Value (10%)          -2.590357e+00
dtype: float64
------------------------------------------------------
Transport and Logistics
Results of Dickey-Fuller Test:
Test Statistic                 -5.172061
p-value                         0.000010
#Lags Used                      0.000000
Number of Observations Used    61.000000
Critical Value (1%)            -3.542413
Critical Value (5%)            -2.910236
Critical Value (10%)           -2.592745
dtype: float64
------------------------------------------------------
Property and Consultation
Results of Dickey-Fuller Test:
Test Statistic                 -2.044954
p-value                         0.267216
#Lags Used                      5.000000
Number of Observations Used    24.000000
Critical Value (1%)            -3.737709
Critical Value (5%)            -2.992216
Critical Value (10%)           -2.635747
dtype: float64
------------------------------------------------------
Public Service and Administration
Results of Dickey-Fuller Test:
Test Statistic                 -5.225241
p-value                         0.000008
#Lags Used                      0.000000
Number of Observations Used    45.000000
Critical Value (1%)            -3.584829
Critical Value (5%)            -2.928299
Critical Value (10%)           -2.602344
dtype: float64
------------------------------------------------------
Hospitality and Event Management
Results of Dickey-Fuller Test:
Test Statistic                 -5.169368
p-value                         0.000010
#Lags Used                      0.000000
Number of Observations Used    35.000000
Critical Value (1%)            -3.632743
Critical Value (5%)            -2.948510
Critical Value (10%)           -2.613017
dtype: float64
------------------------------------------------------

Data Transformation

1. Log Transformation

In [13]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_log=np.log(d_it)
    #demand_log.dropna(inplace=True)
    test_stationarity(demand_log)
    print('------------------------------------------------------')
Sales
Results of Dickey-Fuller Test:
Test Statistic                -6.391097e+00
p-value                        2.104998e-08
#Lags Used                     0.000000e+00
Number of Observations Used    1.070000e+02
Critical Value (1%)           -3.492996e+00
Critical Value (5%)           -2.888955e+00
Critical Value (10%)          -2.581393e+00
dtype: float64
------------------------------------------------------
Information Technology
Results of Dickey-Fuller Test:
Test Statistic                -5.908134e+00
p-value                        2.678230e-07
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Business Consulting and Management
Results of Dickey-Fuller Test:
Test Statistic                -7.061974e+00
p-value                        5.195502e-10
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Retail
Results of Dickey-Fuller Test:
Test Statistic                  -3.824980
p-value                          0.002662
#Lags Used                       1.000000
Number of Observations Used    101.000000
Critical Value (1%)             -3.496818
Critical Value (5%)             -2.890611
Critical Value (10%)            -2.582277
dtype: float64
------------------------------------------------------
Marketing, Advertising and PR
Results of Dickey-Fuller Test:
Test Statistic                 -4.128048
p-value                         0.000870
#Lags Used                      1.000000
Number of Observations Used    93.000000
Critical Value (1%)            -3.502705
Critical Value (5%)            -2.893158
Critical Value (10%)           -2.583637
dtype: float64
------------------------------------------------------
Teacher Training and Education
Results of Dickey-Fuller Test:
Test Statistic                -8.821907e+00
p-value                        1.867448e-14
#Lags Used                     0.000000e+00
Number of Observations Used    8.700000e+01
Critical Value (1%)           -3.507853e+00
Critical Value (5%)           -2.895382e+00
Critical Value (10%)          -2.584824e+00
dtype: float64
------------------------------------------------------
Accounting, Finance and Banking
Results of Dickey-Fuller Test:
Test Statistic                -6.478806e+00
p-value                        1.310640e-08
#Lags Used                     0.000000e+00
Number of Observations Used    9.700000e+01
Critical Value (1%)           -3.499637e+00
Critical Value (5%)           -2.891831e+00
Critical Value (10%)          -2.582928e+00
dtype: float64
------------------------------------------------------
Engineering and Manufacturing
Results of Dickey-Fuller Test:
Test Statistic                -6.964843e+00
p-value                        8.969965e-10
#Lags Used                     0.000000e+00
Number of Observations Used    8.400000e+01
Critical Value (1%)           -3.510712e+00
Critical Value (5%)           -2.896616e+00
Critical Value (10%)          -2.585482e+00
dtype: float64
------------------------------------------------------
Healthcare
Results of Dickey-Fuller Test:
Test Statistic                -6.650508e+00
p-value                        5.135188e-09
#Lags Used                     0.000000e+00
Number of Observations Used    8.200000e+01
Critical Value (1%)           -3.512738e+00
Critical Value (5%)           -2.897490e+00
Critical Value (10%)          -2.585949e+00
dtype: float64
------------------------------------------------------
Media and Internet
Results of Dickey-Fuller Test:
Test Statistic                -8.728604e+00
p-value                        3.237346e-14
#Lags Used                     1.000000e+00
Number of Observations Used    7.500000e+01
Critical Value (1%)           -3.520713e+00
Critical Value (5%)           -2.900925e+00
Critical Value (10%)          -2.587781e+00
dtype: float64
------------------------------------------------------
Recruitment and HR
Results of Dickey-Fuller Test:
Test Statistic                -6.913090e+00
p-value                        1.198409e-09
#Lags Used                     0.000000e+00
Number of Observations Used    6.700000e+01
Critical Value (1%)           -3.531955e+00
Critical Value (5%)           -2.905755e+00
Critical Value (10%)          -2.590357e+00
dtype: float64
------------------------------------------------------
Transport and Logistics
Results of Dickey-Fuller Test:
Test Statistic                 -2.236194
p-value                         0.193372
#Lags Used                      8.000000
Number of Observations Used    53.000000
Critical Value (1%)            -3.560242
Critical Value (5%)            -2.917850
Critical Value (10%)           -2.596796
dtype: float64
------------------------------------------------------
Property and Consultation
Results of Dickey-Fuller Test:
Test Statistic                 -3.614496
p-value                         0.005490
#Lags Used                      0.000000
Number of Observations Used    29.000000
Critical Value (1%)            -3.679060
Critical Value (5%)            -2.967882
Critical Value (10%)           -2.623158
dtype: float64
------------------------------------------------------
Public Service and Administration
Results of Dickey-Fuller Test:
Test Statistic                 -5.632815
p-value                         0.000001
#Lags Used                      0.000000
Number of Observations Used    45.000000
Critical Value (1%)            -3.584829
Critical Value (5%)            -2.928299
Critical Value (10%)           -2.602344
dtype: float64
------------------------------------------------------
Hospitality and Event Management
Results of Dickey-Fuller Test:
Test Statistic                 -5.283190
p-value                         0.000006
#Lags Used                      0.000000
Number of Observations Used    35.000000
Critical Value (1%)            -3.632743
Critical Value (5%)            -2.948510
Critical Value (10%)           -2.613017
dtype: float64
------------------------------------------------------

2. MaxMinScaler Transfromation (Normalization)

In [14]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_values=d_it.values
    scaler=MinMaxScaler(feature_range=(0,1))
    demand_values=demand_values.reshape(len(demand_values),1)
    scaler_fit=scaler.fit(demand_values)
    demand_nor=scaler.transform(demand_values)
    index=d_it.index
    #demand_nor=demand_nor.reshape(len(demand_values),1)
    demand_nor=demand_nor.flatten()
    demand_nor=pd.Series(demand_nor,index=index)
    test_stationarity(demand_nor)
    print('------------------------------------------------------')
Sales
Results of Dickey-Fuller Test:
Test Statistic                -6.358995e+00
p-value                        2.501454e-08
#Lags Used                     0.000000e+00
Number of Observations Used    1.070000e+02
Critical Value (1%)           -3.492996e+00
Critical Value (5%)           -2.888955e+00
Critical Value (10%)          -2.581393e+00
dtype: float64
------------------------------------------------------
Information Technology
Results of Dickey-Fuller Test:
Test Statistic                -6.455822e+00
p-value                        1.484382e-08
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Business Consulting and Management
Results of Dickey-Fuller Test:
Test Statistic                -7.549097e+00
p-value                        3.224296e-11
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Retail
Results of Dickey-Fuller Test:
Test Statistic                  -3.920508
p-value                          0.001889
#Lags Used                       1.000000
Number of Observations Used    101.000000
Critical Value (1%)             -3.496818
Critical Value (5%)             -2.890611
Critical Value (10%)            -2.582277
dtype: float64
------------------------------------------------------
Marketing, Advertising and PR
Results of Dickey-Fuller Test:
Test Statistic                -5.852893e+00
p-value                        3.555029e-07
#Lags Used                     0.000000e+00
Number of Observations Used    9.400000e+01
Critical Value (1%)           -3.501912e+00
Critical Value (5%)           -2.892815e+00
Critical Value (10%)          -2.583454e+00
dtype: float64
------------------------------------------------------
Teacher Training and Education
Results of Dickey-Fuller Test:
Test Statistic                -9.778959e+00
p-value                        6.814323e-17
#Lags Used                     0.000000e+00
Number of Observations Used    8.700000e+01
Critical Value (1%)           -3.507853e+00
Critical Value (5%)           -2.895382e+00
Critical Value (10%)          -2.584824e+00
dtype: float64
------------------------------------------------------
Accounting, Finance and Banking
Results of Dickey-Fuller Test:
Test Statistic                 -4.837813
p-value                         0.000046
#Lags Used                      0.000000
Number of Observations Used    97.000000
Critical Value (1%)            -3.499637
Critical Value (5%)            -2.891831
Critical Value (10%)           -2.582928
dtype: float64
------------------------------------------------------
Engineering and Manufacturing
Results of Dickey-Fuller Test:
Test Statistic                -8.544093e+00
p-value                        9.607190e-14
#Lags Used                     0.000000e+00
Number of Observations Used    8.400000e+01
Critical Value (1%)           -3.510712e+00
Critical Value (5%)           -2.896616e+00
Critical Value (10%)          -2.585482e+00
dtype: float64
------------------------------------------------------
Healthcare
Results of Dickey-Fuller Test:
Test Statistic                -8.476088e+00
p-value                        1.434169e-13
#Lags Used                     0.000000e+00
Number of Observations Used    8.200000e+01
Critical Value (1%)           -3.512738e+00
Critical Value (5%)           -2.897490e+00
Critical Value (10%)          -2.585949e+00
dtype: float64
------------------------------------------------------
Media and Internet
Results of Dickey-Fuller Test:
Test Statistic                -7.826999e+00
p-value                        6.441844e-12
#Lags Used                     1.000000e+00
Number of Observations Used    7.500000e+01
Critical Value (1%)           -3.520713e+00
Critical Value (5%)           -2.900925e+00
Critical Value (10%)          -2.587781e+00
dtype: float64
------------------------------------------------------
Recruitment and HR
Results of Dickey-Fuller Test:
Test Statistic                -7.425173e+00
p-value                        6.577578e-11
#Lags Used                     0.000000e+00
Number of Observations Used    6.700000e+01
Critical Value (1%)           -3.531955e+00
Critical Value (5%)           -2.905755e+00
Critical Value (10%)          -2.590357e+00
dtype: float64
------------------------------------------------------
Transport and Logistics
Results of Dickey-Fuller Test:
Test Statistic                 -5.172061
p-value                         0.000010
#Lags Used                      0.000000
Number of Observations Used    61.000000
Critical Value (1%)            -3.542413
Critical Value (5%)            -2.910236
Critical Value (10%)           -2.592745
dtype: float64
------------------------------------------------------
Property and Consultation
Results of Dickey-Fuller Test:
Test Statistic                 -2.044954
p-value                         0.267216
#Lags Used                      5.000000
Number of Observations Used    24.000000
Critical Value (1%)            -3.737709
Critical Value (5%)            -2.992216
Critical Value (10%)           -2.635747
dtype: float64
------------------------------------------------------
Public Service and Administration
Results of Dickey-Fuller Test:
Test Statistic                 -5.225241
p-value                         0.000008
#Lags Used                      0.000000
Number of Observations Used    45.000000
Critical Value (1%)            -3.584829
Critical Value (5%)            -2.928299
Critical Value (10%)           -2.602344
dtype: float64
------------------------------------------------------
Hospitality and Event Management
Results of Dickey-Fuller Test:
Test Statistic                 -5.169368
p-value                         0.000010
#Lags Used                      0.000000
Number of Observations Used    35.000000
Critical Value (1%)            -3.632743
Critical Value (5%)            -2.948510
Critical Value (10%)           -2.613017
dtype: float64
------------------------------------------------------

3. StandardScaler Transformation (Standardization)

In [15]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_values=d_it.values
    demand_values=demand_values.reshape(len(demand_values),1)
    std_scaler=StandardScaler()
    std_scaler_fit=std_scaler.fit(demand_values)
    demand_std=std_scaler.transform(demand_values)
    index=d_it.index
    demand_std=demand_std.flatten()
    demand_std=pd.Series(demand_std,index=index)
    test_stationarity(demand_std)
    print('------------------------------------------------------')
Sales
Results of Dickey-Fuller Test:
Test Statistic                -6.358995e+00
p-value                        2.501454e-08
#Lags Used                     0.000000e+00
Number of Observations Used    1.070000e+02
Critical Value (1%)           -3.492996e+00
Critical Value (5%)           -2.888955e+00
Critical Value (10%)          -2.581393e+00
dtype: float64
------------------------------------------------------
Information Technology
Results of Dickey-Fuller Test:
Test Statistic                -6.455822e+00
p-value                        1.484382e-08
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Business Consulting and Management
Results of Dickey-Fuller Test:
Test Statistic                -7.549097e+00
p-value                        3.224296e-11
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Retail
Results of Dickey-Fuller Test:
Test Statistic                  -3.920508
p-value                          0.001889
#Lags Used                       1.000000
Number of Observations Used    101.000000
Critical Value (1%)             -3.496818
Critical Value (5%)             -2.890611
Critical Value (10%)            -2.582277
dtype: float64
------------------------------------------------------
Marketing, Advertising and PR
Results of Dickey-Fuller Test:
Test Statistic                -5.852893e+00
p-value                        3.555029e-07
#Lags Used                     0.000000e+00
Number of Observations Used    9.400000e+01
Critical Value (1%)           -3.501912e+00
Critical Value (5%)           -2.892815e+00
Critical Value (10%)          -2.583454e+00
dtype: float64
------------------------------------------------------
Teacher Training and Education
Results of Dickey-Fuller Test:
Test Statistic                -9.778959e+00
p-value                        6.814323e-17
#Lags Used                     0.000000e+00
Number of Observations Used    8.700000e+01
Critical Value (1%)           -3.507853e+00
Critical Value (5%)           -2.895382e+00
Critical Value (10%)          -2.584824e+00
dtype: float64
------------------------------------------------------
Accounting, Finance and Banking
Results of Dickey-Fuller Test:
Test Statistic                 -4.837813
p-value                         0.000046
#Lags Used                      0.000000
Number of Observations Used    97.000000
Critical Value (1%)            -3.499637
Critical Value (5%)            -2.891831
Critical Value (10%)           -2.582928
dtype: float64
------------------------------------------------------
Engineering and Manufacturing
Results of Dickey-Fuller Test:
Test Statistic                -8.544093e+00
p-value                        9.607190e-14
#Lags Used                     0.000000e+00
Number of Observations Used    8.400000e+01
Critical Value (1%)           -3.510712e+00
Critical Value (5%)           -2.896616e+00
Critical Value (10%)          -2.585482e+00
dtype: float64
------------------------------------------------------
Healthcare
Results of Dickey-Fuller Test:
Test Statistic                -8.476088e+00
p-value                        1.434169e-13
#Lags Used                     0.000000e+00
Number of Observations Used    8.200000e+01
Critical Value (1%)           -3.512738e+00
Critical Value (5%)           -2.897490e+00
Critical Value (10%)          -2.585949e+00
dtype: float64
------------------------------------------------------
Media and Internet
Results of Dickey-Fuller Test:
Test Statistic                -7.826999e+00
p-value                        6.441844e-12
#Lags Used                     1.000000e+00
Number of Observations Used    7.500000e+01
Critical Value (1%)           -3.520713e+00
Critical Value (5%)           -2.900925e+00
Critical Value (10%)          -2.587781e+00
dtype: float64
------------------------------------------------------
Recruitment and HR
Results of Dickey-Fuller Test:
Test Statistic                -7.425173e+00
p-value                        6.577578e-11
#Lags Used                     0.000000e+00
Number of Observations Used    6.700000e+01
Critical Value (1%)           -3.531955e+00
Critical Value (5%)           -2.905755e+00
Critical Value (10%)          -2.590357e+00
dtype: float64
------------------------------------------------------
Transport and Logistics
Results of Dickey-Fuller Test:
Test Statistic                 -5.172061
p-value                         0.000010
#Lags Used                      0.000000
Number of Observations Used    61.000000
Critical Value (1%)            -3.542413
Critical Value (5%)            -2.910236
Critical Value (10%)           -2.592745
dtype: float64
------------------------------------------------------
Property and Consultation
Results of Dickey-Fuller Test:
Test Statistic                 -2.044954
p-value                         0.267216
#Lags Used                      5.000000
Number of Observations Used    24.000000
Critical Value (1%)            -3.737709
Critical Value (5%)            -2.992216
Critical Value (10%)           -2.635747
dtype: float64
------------------------------------------------------
Public Service and Administration
Results of Dickey-Fuller Test:
Test Statistic                 -5.225241
p-value                         0.000008
#Lags Used                      0.000000
Number of Observations Used    45.000000
Critical Value (1%)            -3.584829
Critical Value (5%)            -2.928299
Critical Value (10%)           -2.602344
dtype: float64
------------------------------------------------------
Hospitality and Event Management
Results of Dickey-Fuller Test:
Test Statistic                 -5.169368
p-value                         0.000010
#Lags Used                      0.000000
Number of Observations Used    35.000000
Critical Value (1%)            -3.632743
Critical Value (5%)            -2.948510
Critical Value (10%)           -2.613017
dtype: float64
------------------------------------------------------

4. Square Root Transformation

In [16]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_sqrt=np.sqrt(d_it)
    test_stationarity(demand_sqrt)
    print('------------------------------------------------------')
Sales
Results of Dickey-Fuller Test:
Test Statistic                -6.123122e+00
p-value                        8.757855e-08
#Lags Used                     0.000000e+00
Number of Observations Used    1.070000e+02
Critical Value (1%)           -3.492996e+00
Critical Value (5%)           -2.888955e+00
Critical Value (10%)          -2.581393e+00
dtype: float64
------------------------------------------------------
Information Technology
Results of Dickey-Fuller Test:
Test Statistic                -5.981011e+00
p-value                        1.838566e-07
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Business Consulting and Management
Results of Dickey-Fuller Test:
Test Statistic                -7.049392e+00
p-value                        5.577281e-10
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Retail
Results of Dickey-Fuller Test:
Test Statistic                  -3.722397
p-value                          0.003809
#Lags Used                       1.000000
Number of Observations Used    101.000000
Critical Value (1%)             -3.496818
Critical Value (5%)             -2.890611
Critical Value (10%)            -2.582277
dtype: float64
------------------------------------------------------
Marketing, Advertising and PR
Results of Dickey-Fuller Test:
Test Statistic                 -4.361318
p-value                         0.000347
#Lags Used                      1.000000
Number of Observations Used    93.000000
Critical Value (1%)            -3.502705
Critical Value (5%)            -2.893158
Critical Value (10%)           -2.583637
dtype: float64
------------------------------------------------------
Teacher Training and Education
Results of Dickey-Fuller Test:
Test Statistic                -9.399173e+00
p-value                        6.254700e-16
#Lags Used                     0.000000e+00
Number of Observations Used    8.700000e+01
Critical Value (1%)           -3.507853e+00
Critical Value (5%)           -2.895382e+00
Critical Value (10%)          -2.584824e+00
dtype: float64
------------------------------------------------------
Accounting, Finance and Banking
Results of Dickey-Fuller Test:
Test Statistic                 -5.453426
p-value                         0.000003
#Lags Used                      0.000000
Number of Observations Used    97.000000
Critical Value (1%)            -3.499637
Critical Value (5%)            -2.891831
Critical Value (10%)           -2.582928
dtype: float64
------------------------------------------------------
Engineering and Manufacturing
Results of Dickey-Fuller Test:
Test Statistic                -7.544139e+00
p-value                        3.317823e-11
#Lags Used                     0.000000e+00
Number of Observations Used    8.400000e+01
Critical Value (1%)           -3.510712e+00
Critical Value (5%)           -2.896616e+00
Critical Value (10%)          -2.585482e+00
dtype: float64
------------------------------------------------------
Healthcare
Results of Dickey-Fuller Test:
Test Statistic                -7.473102e+00
p-value                        4.994558e-11
#Lags Used                     0.000000e+00
Number of Observations Used    8.200000e+01
Critical Value (1%)           -3.512738e+00
Critical Value (5%)           -2.897490e+00
Critical Value (10%)          -2.585949e+00
dtype: float64
------------------------------------------------------
Media and Internet
Results of Dickey-Fuller Test:
Test Statistic                -8.274900e+00
p-value                        4.685068e-13
#Lags Used                     1.000000e+00
Number of Observations Used    7.500000e+01
Critical Value (1%)           -3.520713e+00
Critical Value (5%)           -2.900925e+00
Critical Value (10%)          -2.587781e+00
dtype: float64
------------------------------------------------------
Recruitment and HR
Results of Dickey-Fuller Test:
Test Statistic                -7.097469e+00
p-value                        4.252416e-10
#Lags Used                     0.000000e+00
Number of Observations Used    6.700000e+01
Critical Value (1%)           -3.531955e+00
Critical Value (5%)           -2.905755e+00
Critical Value (10%)          -2.590357e+00
dtype: float64
------------------------------------------------------
Transport and Logistics
Results of Dickey-Fuller Test:
Test Statistic                 -5.295100
p-value                         0.000006
#Lags Used                      0.000000
Number of Observations Used    61.000000
Critical Value (1%)            -3.542413
Critical Value (5%)            -2.910236
Critical Value (10%)           -2.592745
dtype: float64
------------------------------------------------------
Property and Consultation
Results of Dickey-Fuller Test:
Test Statistic                 -3.582385
p-value                         0.006107
#Lags Used                      1.000000
Number of Observations Used    28.000000
Critical Value (1%)            -3.688926
Critical Value (5%)            -2.971989
Critical Value (10%)           -2.625296
dtype: float64
------------------------------------------------------
Public Service and Administration
Results of Dickey-Fuller Test:
Test Statistic                 -5.424877
p-value                         0.000003
#Lags Used                      0.000000
Number of Observations Used    45.000000
Critical Value (1%)            -3.584829
Critical Value (5%)            -2.928299
Critical Value (10%)           -2.602344
dtype: float64
------------------------------------------------------
Hospitality and Event Management
Results of Dickey-Fuller Test:
Test Statistic                 -5.191720
p-value                         0.000009
#Lags Used                      0.000000
Number of Observations Used    35.000000
Critical Value (1%)            -3.632743
Critical Value (5%)            -2.948510
Critical Value (10%)           -2.613017
dtype: float64
------------------------------------------------------

5. Cube Root Transfromation

In [130]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_cbrt=np.cbrt(d_it)
    test_stationarity(demand_cbrt)
    print('------------------------------------------------------')
Sales
Results of Dickey-Fuller Test:
Test Statistic                -6.148129e+00
p-value                        7.678167e-08
#Lags Used                     0.000000e+00
Number of Observations Used    1.070000e+02
Critical Value (1%)           -3.492996e+00
Critical Value (5%)           -2.888955e+00
Critical Value (10%)          -2.581393e+00
dtype: float64
------------------------------------------------------
Information Technology
Results of Dickey-Fuller Test:
Test Statistic                -5.898106e+00
p-value                        2.819881e-07
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Business Consulting and Management
Results of Dickey-Fuller Test:
Test Statistic                -6.991222e+00
p-value                        7.735960e-10
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Retail
Results of Dickey-Fuller Test:
Test Statistic                  -3.716545
p-value                          0.003886
#Lags Used                       1.000000
Number of Observations Used    101.000000
Critical Value (1%)             -3.496818
Critical Value (5%)             -2.890611
Critical Value (10%)            -2.582277
dtype: float64
------------------------------------------------------
Marketing, Advertising and PR
Results of Dickey-Fuller Test:
Test Statistic                 -4.270328
p-value                         0.000500
#Lags Used                      1.000000
Number of Observations Used    93.000000
Critical Value (1%)            -3.502705
Critical Value (5%)            -2.893158
Critical Value (10%)           -2.583637
dtype: float64
------------------------------------------------------
Teacher Training and Education
Results of Dickey-Fuller Test:
Test Statistic                -9.210272e+00
p-value                        1.896004e-15
#Lags Used                     0.000000e+00
Number of Observations Used    8.700000e+01
Critical Value (1%)           -3.507853e+00
Critical Value (5%)           -2.895382e+00
Critical Value (10%)          -2.584824e+00
dtype: float64
------------------------------------------------------
Accounting, Finance and Banking
Results of Dickey-Fuller Test:
Test Statistic                -5.775922e+00
p-value                        5.259927e-07
#Lags Used                     0.000000e+00
Number of Observations Used    9.700000e+01
Critical Value (1%)           -3.499637e+00
Critical Value (5%)           -2.891831e+00
Critical Value (10%)          -2.582928e+00
dtype: float64
------------------------------------------------------
Engineering and Manufacturing
Results of Dickey-Fuller Test:
Test Statistic                -7.276472e+00
p-value                        1.539766e-10
#Lags Used                     0.000000e+00
Number of Observations Used    8.400000e+01
Critical Value (1%)           -3.510712e+00
Critical Value (5%)           -2.896616e+00
Critical Value (10%)          -2.585482e+00
dtype: float64
------------------------------------------------------
Healthcare
Results of Dickey-Fuller Test:
Test Statistic                -7.153826e+00
p-value                        3.091522e-10
#Lags Used                     0.000000e+00
Number of Observations Used    8.200000e+01
Critical Value (1%)           -3.512738e+00
Critical Value (5%)           -2.897490e+00
Critical Value (10%)          -2.585949e+00
dtype: float64
------------------------------------------------------
Media and Internet
Results of Dickey-Fuller Test:
Test Statistic                -8.430370e+00
p-value                        1.877267e-13
#Lags Used                     1.000000e+00
Number of Observations Used    7.500000e+01
Critical Value (1%)           -3.520713e+00
Critical Value (5%)           -2.900925e+00
Critical Value (10%)          -2.587781e+00
dtype: float64
------------------------------------------------------
Recruitment and HR
Results of Dickey-Fuller Test:
Test Statistic                -7.017947e+00
p-value                        6.657201e-10
#Lags Used                     0.000000e+00
Number of Observations Used    6.700000e+01
Critical Value (1%)           -3.531955e+00
Critical Value (5%)           -2.905755e+00
Critical Value (10%)          -2.590357e+00
dtype: float64
------------------------------------------------------
Transport and Logistics
Results of Dickey-Fuller Test:
Test Statistic                 -5.311113
p-value                         0.000005
#Lags Used                      0.000000
Number of Observations Used    61.000000
Critical Value (1%)            -3.542413
Critical Value (5%)            -2.910236
Critical Value (10%)           -2.592745
dtype: float64
------------------------------------------------------
Property and Consultation
Results of Dickey-Fuller Test:
Test Statistic                 -3.481670
p-value                         0.008471
#Lags Used                      1.000000
Number of Observations Used    28.000000
Critical Value (1%)            -3.688926
Critical Value (5%)            -2.971989
Critical Value (10%)           -2.625296
dtype: float64
------------------------------------------------------
Public Service and Administration
Results of Dickey-Fuller Test:
Test Statistic                 -5.494984
p-value                         0.000002
#Lags Used                      0.000000
Number of Observations Used    45.000000
Critical Value (1%)            -3.584829
Critical Value (5%)            -2.928299
Critical Value (10%)           -2.602344
dtype: float64
------------------------------------------------------
Hospitality and Event Management
Results of Dickey-Fuller Test:
Test Statistic                 -5.215911
p-value                         0.000008
#Lags Used                      0.000000
Number of Observations Used    35.000000
Critical Value (1%)            -3.632743
Critical Value (5%)            -2.948510
Critical Value (10%)           -2.613017
dtype: float64
------------------------------------------------------

6. Differencing Transformation (Shift 1)

In [19]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_diff=d_it.diff(periods=1)
    demand_diff.dropna(inplace=True)
    test_stationarity(demand_diff)
    print('------------------------------------------------------')
Sales
Results of Dickey-Fuller Test:
Test Statistic                -8.781051e+00
p-value                        2.376155e-14
#Lags Used                     2.000000e+00
Number of Observations Used    1.040000e+02
Critical Value (1%)           -3.494850e+00
Critical Value (5%)           -2.889758e+00
Critical Value (10%)          -2.581822e+00
dtype: float64
------------------------------------------------------
Information Technology
Results of Dickey-Fuller Test:
Test Statistic                -5.870140e+00
p-value                        3.254774e-07
#Lags Used                     8.000000e+00
Number of Observations Used    9.200000e+01
Critical Value (1%)           -3.503515e+00
Critical Value (5%)           -2.893508e+00
Critical Value (10%)          -2.583824e+00
dtype: float64
------------------------------------------------------
Business Consulting and Management
Results of Dickey-Fuller Test:
Test Statistic                -6.214430e+00
p-value                        5.408936e-08
#Lags Used                     6.000000e+00
Number of Observations Used    9.400000e+01
Critical Value (1%)           -3.501912e+00
Critical Value (5%)           -2.892815e+00
Critical Value (10%)          -2.583454e+00
dtype: float64
------------------------------------------------------
Retail
Results of Dickey-Fuller Test:
Test Statistic                -1.120299e+01
p-value                        2.219986e-20
#Lags Used                     1.000000e+00
Number of Observations Used    1.000000e+02
Critical Value (1%)           -3.497501e+00
Critical Value (5%)           -2.890906e+00
Critical Value (10%)          -2.582435e+00
dtype: float64
------------------------------------------------------
Marketing, Advertising and PR
Results of Dickey-Fuller Test:
Test Statistic                -9.276647e+00
p-value                        1.283643e-15
#Lags Used                     2.000000e+00
Number of Observations Used    9.100000e+01
Critical Value (1%)           -3.504343e+00
Critical Value (5%)           -2.893866e+00
Critical Value (10%)          -2.584015e+00
dtype: float64
------------------------------------------------------
Teacher Training and Education
Results of Dickey-Fuller Test:
Test Statistic                -6.091155e+00
p-value                        1.035720e-07
#Lags Used                     6.000000e+00
Number of Observations Used    8.000000e+01
Critical Value (1%)           -3.514869e+00
Critical Value (5%)           -2.898409e+00
Critical Value (10%)          -2.586439e+00
dtype: float64
------------------------------------------------------
Accounting, Finance and Banking
Results of Dickey-Fuller Test:
Test Statistic                -9.327661e+00
p-value                        9.514078e-16
#Lags Used                     1.000000e+00
Number of Observations Used    9.500000e+01
Critical Value (1%)           -3.501137e+00
Critical Value (5%)           -2.892480e+00
Critical Value (10%)          -2.583275e+00
dtype: float64
------------------------------------------------------
Engineering and Manufacturing
Results of Dickey-Fuller Test:
Test Statistic                -6.054187e+00
p-value                        1.256607e-07
#Lags Used                     6.000000e+00
Number of Observations Used    7.700000e+01
Critical Value (1%)           -3.518281e+00
Critical Value (5%)           -2.899878e+00
Critical Value (10%)          -2.587223e+00
dtype: float64
------------------------------------------------------
Healthcare
Results of Dickey-Fuller Test:
Test Statistic                -7.838787e+00
p-value                        6.014589e-12
#Lags Used                     3.000000e+00
Number of Observations Used    7.800000e+01
Critical Value (1%)           -3.517114e+00
Critical Value (5%)           -2.899375e+00
Critical Value (10%)          -2.586955e+00
dtype: float64
------------------------------------------------------
Media and Internet
Results of Dickey-Fuller Test:
Test Statistic                 -4.654294
p-value                         0.000102
#Lags Used                     11.000000
Number of Observations Used    64.000000
Critical Value (1%)            -3.536928
Critical Value (5%)            -2.907887
Critical Value (10%)           -2.591493
dtype: float64
------------------------------------------------------
Recruitment and HR
Results of Dickey-Fuller Test:
Test Statistic                -7.126322e+00
p-value                        3.612414e-10
#Lags Used                     5.000000e+00
Number of Observations Used    6.100000e+01
Critical Value (1%)           -3.542413e+00
Critical Value (5%)           -2.910236e+00
Critical Value (10%)          -2.592745e+00
dtype: float64
------------------------------------------------------
Transport and Logistics
Results of Dickey-Fuller Test:
Test Statistic                -9.242078e+00
p-value                        1.572708e-15
#Lags Used                     1.000000e+00
Number of Observations Used    5.900000e+01
Critical Value (1%)           -3.546395e+00
Critical Value (5%)           -2.911939e+00
Critical Value (10%)          -2.593652e+00
dtype: float64
------------------------------------------------------
Property and Consultation
Results of Dickey-Fuller Test:
Test Statistic                 -2.073325
p-value                         0.255375
#Lags Used                      7.000000
Number of Observations Used    21.000000
Critical Value (1%)            -3.788386
Critical Value (5%)            -3.013098
Critical Value (10%)           -2.646397
dtype: float64
------------------------------------------------------
Public Service and Administration
Results of Dickey-Fuller Test:
Test Statistic                 -5.196348
p-value                         0.000009
#Lags Used                      4.000000
Number of Observations Used    40.000000
Critical Value (1%)            -3.605565
Critical Value (5%)            -2.937069
Critical Value (10%)           -2.606986
dtype: float64
------------------------------------------------------
Hospitality and Event Management
Results of Dickey-Fuller Test:
Test Statistic                -7.802775e+00
p-value                        7.416921e-12
#Lags Used                     1.000000e+00
Number of Observations Used    3.300000e+01
Critical Value (1%)           -3.646135e+00
Critical Value (5%)           -2.954127e+00
Critical Value (10%)          -2.615968e+00
dtype: float64
------------------------------------------------------

7. Differencing Transformation (Shift 2)

In [20]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_diff2=d_it.diff(periods=2)
    demand_diff2.dropna(inplace=True)
    test_stationarity(demand_diff2)
    print('------------------------------------------------------')
Sales
Results of Dickey-Fuller Test:
Test Statistic                 -4.737067
p-value                         0.000072
#Lags Used                      9.000000
Number of Observations Used    96.000000
Critical Value (1%)            -3.500379
Critical Value (5%)            -2.892152
Critical Value (10%)           -2.583100
dtype: float64
------------------------------------------------------
Information Technology
Results of Dickey-Fuller Test:
Test Statistic                 -4.107006
p-value                         0.000943
#Lags Used                      8.000000
Number of Observations Used    91.000000
Critical Value (1%)            -3.504343
Critical Value (5%)            -2.893866
Critical Value (10%)           -2.584015
dtype: float64
------------------------------------------------------
Business Consulting and Management
Results of Dickey-Fuller Test:
Test Statistic                 -4.986066
p-value                         0.000024
#Lags Used                     11.000000
Number of Observations Used    88.000000
Critical Value (1%)            -3.506944
Critical Value (5%)            -2.894990
Critical Value (10%)           -2.584615
dtype: float64
------------------------------------------------------
Retail
Results of Dickey-Fuller Test:
Test Statistic                 -3.909102
p-value                         0.001969
#Lags Used                      6.000000
Number of Observations Used    94.000000
Critical Value (1%)            -3.501912
Critical Value (5%)            -2.892815
Critical Value (10%)           -2.583454
dtype: float64
------------------------------------------------------
Marketing, Advertising and PR
Results of Dickey-Fuller Test:
Test Statistic                 -4.327771
p-value                         0.000398
#Lags Used                     11.000000
Number of Observations Used    81.000000
Critical Value (1%)            -3.513790
Critical Value (5%)            -2.897943
Critical Value (10%)           -2.586191
dtype: float64
------------------------------------------------------
Teacher Training and Education
Results of Dickey-Fuller Test:
Test Statistic                -5.858946e+00
p-value                        3.446686e-07
#Lags Used                     7.000000e+00
Number of Observations Used    7.800000e+01
Critical Value (1%)           -3.517114e+00
Critical Value (5%)           -2.899375e+00
Critical Value (10%)          -2.586955e+00
dtype: float64
------------------------------------------------------
Accounting, Finance and Banking
Results of Dickey-Fuller Test:
Test Statistic                 -4.504134
p-value                         0.000193
#Lags Used                     11.000000
Number of Observations Used    84.000000
Critical Value (1%)            -3.510712
Critical Value (5%)            -2.896616
Critical Value (10%)           -2.585482
dtype: float64
------------------------------------------------------
Engineering and Manufacturing
Results of Dickey-Fuller Test:
Test Statistic                 -4.519464
p-value                         0.000181
#Lags Used                     11.000000
Number of Observations Used    71.000000
Critical Value (1%)            -3.526005
Critical Value (5%)            -2.903200
Critical Value (10%)           -2.588995
dtype: float64
------------------------------------------------------
Healthcare
Results of Dickey-Fuller Test:
Test Statistic                 -4.965984
p-value                         0.000026
#Lags Used                      7.000000
Number of Observations Used    73.000000
Critical Value (1%)            -3.523284
Critical Value (5%)            -2.902031
Critical Value (10%)           -2.588371
dtype: float64
------------------------------------------------------
Media and Internet
Results of Dickey-Fuller Test:
Test Statistic                 -5.566438
p-value                         0.000002
#Lags Used                      7.000000
Number of Observations Used    67.000000
Critical Value (1%)            -3.531955
Critical Value (5%)            -2.905755
Critical Value (10%)           -2.590357
dtype: float64
------------------------------------------------------
Recruitment and HR
Results of Dickey-Fuller Test:
Test Statistic                -6.475681e+00
p-value                        1.333028e-08
#Lags Used                     5.000000e+00
Number of Observations Used    6.000000e+01
Critical Value (1%)           -3.544369e+00
Critical Value (5%)           -2.911073e+00
Critical Value (10%)          -2.593190e+00
dtype: float64
------------------------------------------------------
Transport and Logistics
Results of Dickey-Fuller Test:
Test Statistic                 -3.785199
p-value                         0.003062
#Lags Used                      5.000000
Number of Observations Used    54.000000
Critical Value (1%)            -3.557709
Critical Value (5%)            -2.916770
Critical Value (10%)           -2.596222
dtype: float64
------------------------------------------------------
Property and Consultation
Results of Dickey-Fuller Test:
Test Statistic                 -2.419916
p-value                         0.136194
#Lags Used                      4.000000
Number of Observations Used    23.000000
Critical Value (1%)            -3.752928
Critical Value (5%)            -2.998500
Critical Value (10%)           -2.638967
dtype: float64
------------------------------------------------------
Public Service and Administration
Results of Dickey-Fuller Test:
Test Statistic                -5.946126e+00
p-value                        2.202099e-07
#Lags Used                     3.000000e+00
Number of Observations Used    4.000000e+01
Critical Value (1%)           -3.605565e+00
Critical Value (5%)           -2.937069e+00
Critical Value (10%)          -2.606986e+00
dtype: float64
------------------------------------------------------
Hospitality and Event Management
Results of Dickey-Fuller Test:
Test Statistic                 -3.167373
p-value                         0.021953
#Lags Used                      2.000000
Number of Observations Used    31.000000
Critical Value (1%)            -3.661429
Critical Value (5%)            -2.960525
Critical Value (10%)           -2.619319
dtype: float64
------------------------------------------------------

8. Differencing Transformation (Shift 3)

In [21]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_diff3=d_it.diff(periods=3)
    demand_diff3.dropna(inplace=True)
    test_stationarity(demand_diff3)
    print('------------------------------------------------------')
Sales
Results of Dickey-Fuller Test:
Test Statistic                 -5.140154
p-value                         0.000012
#Lags Used                      8.000000
Number of Observations Used    96.000000
Critical Value (1%)            -3.500379
Critical Value (5%)            -2.892152
Critical Value (10%)           -2.583100
dtype: float64
------------------------------------------------------
Information Technology
Results of Dickey-Fuller Test:
Test Statistic                -6.664181e+00
p-value                        4.763508e-09
#Lags Used                     5.000000e+00
Number of Observations Used    9.300000e+01
Critical Value (1%)           -3.502705e+00
Critical Value (5%)           -2.893158e+00
Critical Value (10%)          -2.583637e+00
dtype: float64
------------------------------------------------------
Business Consulting and Management
Results of Dickey-Fuller Test:
Test Statistic                 -4.974265
p-value                         0.000025
#Lags Used                     11.000000
Number of Observations Used    87.000000
Critical Value (1%)            -3.507853
Critical Value (5%)            -2.895382
Critical Value (10%)           -2.584824
dtype: float64
------------------------------------------------------
Retail
Results of Dickey-Fuller Test:
Test Statistic                 -3.904697
p-value                         0.002001
#Lags Used                     11.000000
Number of Observations Used    88.000000
Critical Value (1%)            -3.506944
Critical Value (5%)            -2.894990
Critical Value (10%)           -2.584615
dtype: float64
------------------------------------------------------
Marketing, Advertising and PR
Results of Dickey-Fuller Test:
Test Statistic                 -4.667659
p-value                         0.000097
#Lags Used                     11.000000
Number of Observations Used    80.000000
Critical Value (1%)            -3.514869
Critical Value (5%)            -2.898409
Critical Value (10%)           -2.586439
dtype: float64
------------------------------------------------------
Teacher Training and Education
Results of Dickey-Fuller Test:
Test Statistic                -7.207363e+00
p-value                        2.281701e-10
#Lags Used                     5.000000e+00
Number of Observations Used    7.900000e+01
Critical Value (1%)           -3.515977e+00
Critical Value (5%)           -2.898886e+00
Critical Value (10%)          -2.586694e+00
dtype: float64
------------------------------------------------------
Accounting, Finance and Banking
Results of Dickey-Fuller Test:
Test Statistic                 -4.455093
p-value                         0.000237
#Lags Used                     10.000000
Number of Observations Used    84.000000
Critical Value (1%)            -3.510712
Critical Value (5%)            -2.896616
Critical Value (10%)           -2.585482
dtype: float64
------------------------------------------------------
Engineering and Manufacturing
Results of Dickey-Fuller Test:
Test Statistic                 -4.849616
p-value                         0.000044
#Lags Used                      8.000000
Number of Observations Used    73.000000
Critical Value (1%)            -3.523284
Critical Value (5%)            -2.902031
Critical Value (10%)           -2.588371
dtype: float64
------------------------------------------------------
Healthcare
Results of Dickey-Fuller Test:
Test Statistic                 -4.698544
p-value                         0.000085
#Lags Used                      8.000000
Number of Observations Used    71.000000
Critical Value (1%)            -3.526005
Critical Value (5%)            -2.903200
Critical Value (10%)           -2.588995
dtype: float64
------------------------------------------------------
Media and Internet
Results of Dickey-Fuller Test:
Test Statistic                 -2.970576
p-value                         0.037735
#Lags Used                     12.000000
Number of Observations Used    61.000000
Critical Value (1%)            -3.542413
Critical Value (5%)            -2.910236
Critical Value (10%)           -2.592745
dtype: float64
------------------------------------------------------
Recruitment and HR
Results of Dickey-Fuller Test:
Test Statistic                 -4.150344
p-value                         0.000799
#Lags Used                     11.000000
Number of Observations Used    53.000000
Critical Value (1%)            -3.560242
Critical Value (5%)            -2.917850
Critical Value (10%)           -2.596796
dtype: float64
------------------------------------------------------
Transport and Logistics
Results of Dickey-Fuller Test:
Test Statistic                 -4.560138
p-value                         0.000153
#Lags Used                      5.000000
Number of Observations Used    53.000000
Critical Value (1%)            -3.560242
Critical Value (5%)            -2.917850
Critical Value (10%)           -2.596796
dtype: float64
------------------------------------------------------
Property and Consultation
Results of Dickey-Fuller Test:
Test Statistic                 -3.991674
p-value                         0.001455
#Lags Used                      7.000000
Number of Observations Used    19.000000
Critical Value (1%)            -3.832603
Critical Value (5%)            -3.031227
Critical Value (10%)           -2.655520
dtype: float64
------------------------------------------------------
Public Service and Administration
Results of Dickey-Fuller Test:
Test Statistic                 -2.884459
p-value                         0.047185
#Lags Used                      8.000000
Number of Observations Used    34.000000
Critical Value (1%)            -3.639224
Critical Value (5%)            -2.951230
Critical Value (10%)           -2.614447
dtype: float64
------------------------------------------------------
Hospitality and Event Management
Results of Dickey-Fuller Test:
Test Statistic                 -4.987987
p-value                         0.000023
#Lags Used                      0.000000
Number of Observations Used    32.000000
Critical Value (1%)            -3.653520
Critical Value (5%)            -2.957219
Critical Value (10%)           -2.617588
dtype: float64
------------------------------------------------------

9. Power Transformation ( Box-Cox )

In [22]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_values=d_it.values   
    scaler=PowerTransformer(method='box-cox')
    demand_values=demand_values.reshape(len(demand_values),1)
    scaler_fit=scaler.fit(demand_values)
    demand_nor=scaler.transform(demand_values)
    index=d_it.index
    #demand_nor=demand_nor.reshape(len(demand_values),1)
    demand_pow_bc=demand_nor.flatten()
    demand_pow_bc=pd.Series(demand_pow_bc,index=index)
    test_stationarity(demand_pow_bc)
    print('------------------------------------------------------')
Sales
Results of Dickey-Fuller Test:
Test Statistic                -6.128917e+00
p-value                        8.495108e-08
#Lags Used                     0.000000e+00
Number of Observations Used    1.070000e+02
Critical Value (1%)           -3.492996e+00
Critical Value (5%)           -2.888955e+00
Critical Value (10%)          -2.581393e+00
dtype: float64
------------------------------------------------------
Information Technology
Results of Dickey-Fuller Test:
Test Statistic                -5.888379e+00
p-value                        2.964264e-07
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Business Consulting and Management
Results of Dickey-Fuller Test:
Test Statistic                -6.986417e+00
p-value                        7.947461e-10
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Retail
Results of Dickey-Fuller Test:
Test Statistic                  -3.723352
p-value                          0.003796
#Lags Used                       1.000000
Number of Observations Used    101.000000
Critical Value (1%)             -3.496818
Critical Value (5%)             -2.890611
Critical Value (10%)            -2.582277
dtype: float64
------------------------------------------------------
Marketing, Advertising and PR
Results of Dickey-Fuller Test:
Test Statistic                 -4.226464
p-value                         0.000594
#Lags Used                      1.000000
Number of Observations Used    93.000000
Critical Value (1%)            -3.502705
Critical Value (5%)            -2.893158
Critical Value (10%)           -2.583637
dtype: float64
------------------------------------------------------
Teacher Training and Education
Results of Dickey-Fuller Test:
Test Statistic                -6.945589e+00
p-value                        9.991792e-10
#Lags Used                     1.000000e+00
Number of Observations Used    8.600000e+01
Critical Value (1%)           -3.508783e+00
Critical Value (5%)           -2.895784e+00
Critical Value (10%)          -2.585038e+00
dtype: float64
------------------------------------------------------
Accounting, Finance and Banking
Results of Dickey-Fuller Test:
Test Statistic                -6.495736e+00
p-value                        1.195632e-08
#Lags Used                     0.000000e+00
Number of Observations Used    9.700000e+01
Critical Value (1%)           -3.499637e+00
Critical Value (5%)           -2.891831e+00
Critical Value (10%)          -2.582928e+00
dtype: float64
------------------------------------------------------
Engineering and Manufacturing
Results of Dickey-Fuller Test:
Test Statistic                -6.958333e+00
p-value                        9.303343e-10
#Lags Used                     0.000000e+00
Number of Observations Used    8.400000e+01
Critical Value (1%)           -3.510712e+00
Critical Value (5%)           -2.896616e+00
Critical Value (10%)          -2.585482e+00
dtype: float64
------------------------------------------------------
Healthcare
Results of Dickey-Fuller Test:
Test Statistic                -6.464876e+00
p-value                        1.413381e-08
#Lags Used                     0.000000e+00
Number of Observations Used    8.200000e+01
Critical Value (1%)           -3.512738e+00
Critical Value (5%)           -2.897490e+00
Critical Value (10%)          -2.585949e+00
dtype: float64
------------------------------------------------------
Media and Internet
Results of Dickey-Fuller Test:
Test Statistic                -8.797404e+00
p-value                        2.157732e-14
#Lags Used                     1.000000e+00
Number of Observations Used    7.500000e+01
Critical Value (1%)           -3.520713e+00
Critical Value (5%)           -2.900925e+00
Critical Value (10%)          -2.587781e+00
dtype: float64
------------------------------------------------------
Recruitment and HR
Results of Dickey-Fuller Test:
Test Statistic                -6.872184e+00
p-value                        1.505802e-09
#Lags Used                     0.000000e+00
Number of Observations Used    6.700000e+01
Critical Value (1%)           -3.531955e+00
Critical Value (5%)           -2.905755e+00
Critical Value (10%)          -2.590357e+00
dtype: float64
------------------------------------------------------
Transport and Logistics
Results of Dickey-Fuller Test:
Test Statistic                 -2.335470
p-value                         0.160783
#Lags Used                      8.000000
Number of Observations Used    53.000000
Critical Value (1%)            -3.560242
Critical Value (5%)            -2.917850
Critical Value (10%)           -2.596796
dtype: float64
------------------------------------------------------
Property and Consultation
Results of Dickey-Fuller Test:
Test Statistic                 -4.304169
p-value                         0.000437
#Lags Used                      0.000000
Number of Observations Used    29.000000
Critical Value (1%)            -3.679060
Critical Value (5%)            -2.967882
Critical Value (10%)           -2.623158
dtype: float64
------------------------------------------------------
Public Service and Administration
Results of Dickey-Fuller Test:
Test Statistic                -5.839040e+00
p-value                        3.815678e-07
#Lags Used                     0.000000e+00
Number of Observations Used    4.500000e+01
Critical Value (1%)           -3.584829e+00
Critical Value (5%)           -2.928299e+00
Critical Value (10%)          -2.602344e+00
dtype: float64
------------------------------------------------------
Hospitality and Event Management
Results of Dickey-Fuller Test:
Test Statistic                 -1.458653
p-value                         0.553857
#Lags Used                      8.000000
Number of Observations Used    27.000000
Critical Value (1%)            -3.699608
Critical Value (5%)            -2.976430
Critical Value (10%)           -2.627601
dtype: float64
------------------------------------------------------

10. Power Transformation ( Yeo-Johnson )

In [23]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_values=d_it.values   
    scaler=PowerTransformer(method='yeo-johnson')
    demand_values=demand_values.reshape(len(demand_values),1)
    scaler_fit=scaler.fit(demand_values)
    demand_nor=scaler.transform(demand_values)
    index=d_it.index
    #demand_nor=demand_nor.reshape(len(demand_values),1)
    demand_pow_yj=demand_nor.flatten()
    demand_pow_yj=pd.Series(demand_pow_yj,index=index)
    test_stationarity(demand_pow_yj)
    print('------------------------------------------------------')
Sales
Results of Dickey-Fuller Test:
Test Statistic                -6.110750e+00
p-value                        9.345797e-08
#Lags Used                     0.000000e+00
Number of Observations Used    1.070000e+02
Critical Value (1%)           -3.492996e+00
Critical Value (5%)           -2.888955e+00
Critical Value (10%)          -2.581393e+00
dtype: float64
------------------------------------------------------
Information Technology
Results of Dickey-Fuller Test:
Test Statistic                -5.882487e+00
p-value                        3.055227e-07
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Business Consulting and Management
Results of Dickey-Fuller Test:
Test Statistic                -6.914257e+00
p-value                        1.190617e-09
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Retail
Results of Dickey-Fuller Test:
Test Statistic                  -3.706313
p-value                          0.004025
#Lags Used                       1.000000
Number of Observations Used    101.000000
Critical Value (1%)             -3.496818
Critical Value (5%)             -2.890611
Critical Value (10%)            -2.582277
dtype: float64
------------------------------------------------------
Marketing, Advertising and PR
Results of Dickey-Fuller Test:
Test Statistic                 -4.236486
p-value                         0.000571
#Lags Used                      1.000000
Number of Observations Used    93.000000
Critical Value (1%)            -3.502705
Critical Value (5%)            -2.893158
Critical Value (10%)           -2.583637
dtype: float64
------------------------------------------------------
Teacher Training and Education
Results of Dickey-Fuller Test:
Test Statistic                -6.906714e+00
p-value                        1.241870e-09
#Lags Used                     1.000000e+00
Number of Observations Used    8.600000e+01
Critical Value (1%)           -3.508783e+00
Critical Value (5%)           -2.895784e+00
Critical Value (10%)          -2.585038e+00
dtype: float64
------------------------------------------------------
Accounting, Finance and Banking
Results of Dickey-Fuller Test:
Test Statistic                -6.476933e+00
p-value                        1.324011e-08
#Lags Used                     0.000000e+00
Number of Observations Used    9.700000e+01
Critical Value (1%)           -3.499637e+00
Critical Value (5%)           -2.891831e+00
Critical Value (10%)          -2.582928e+00
dtype: float64
------------------------------------------------------
Engineering and Manufacturing
Results of Dickey-Fuller Test:
Test Statistic                -6.860720e+00
p-value                        1.605136e-09
#Lags Used                     0.000000e+00
Number of Observations Used    8.400000e+01
Critical Value (1%)           -3.510712e+00
Critical Value (5%)           -2.896616e+00
Critical Value (10%)          -2.585482e+00
dtype: float64
------------------------------------------------------
Healthcare
Results of Dickey-Fuller Test:
Test Statistic                -6.478108e+00
p-value                        1.315603e-08
#Lags Used                     0.000000e+00
Number of Observations Used    8.200000e+01
Critical Value (1%)           -3.512738e+00
Critical Value (5%)           -2.897490e+00
Critical Value (10%)          -2.585949e+00
dtype: float64
------------------------------------------------------
Media and Internet
Results of Dickey-Fuller Test:
Test Statistic                -8.713230e+00
p-value                        3.544540e-14
#Lags Used                     1.000000e+00
Number of Observations Used    7.500000e+01
Critical Value (1%)           -3.520713e+00
Critical Value (5%)           -2.900925e+00
Critical Value (10%)          -2.587781e+00
dtype: float64
------------------------------------------------------
Recruitment and HR
Results of Dickey-Fuller Test:
Test Statistic                -6.846923e+00
p-value                        1.733314e-09
#Lags Used                     0.000000e+00
Number of Observations Used    6.700000e+01
Critical Value (1%)           -3.531955e+00
Critical Value (5%)           -2.905755e+00
Critical Value (10%)          -2.590357e+00
dtype: float64
------------------------------------------------------
Transport and Logistics
Results of Dickey-Fuller Test:
Test Statistic                 -2.327494
p-value                         0.163254
#Lags Used                      8.000000
Number of Observations Used    53.000000
Critical Value (1%)            -3.560242
Critical Value (5%)            -2.917850
Critical Value (10%)           -2.596796
dtype: float64
------------------------------------------------------
Property and Consultation
Results of Dickey-Fuller Test:
Test Statistic                 -4.271934
p-value                         0.000497
#Lags Used                      0.000000
Number of Observations Used    29.000000
Critical Value (1%)            -3.679060
Critical Value (5%)            -2.967882
Critical Value (10%)           -2.623158
dtype: float64
------------------------------------------------------
Public Service and Administration
Results of Dickey-Fuller Test:
Test Statistic                -5.792293e+00
p-value                        4.840785e-07
#Lags Used                     0.000000e+00
Number of Observations Used    4.500000e+01
Critical Value (1%)           -3.584829e+00
Critical Value (5%)           -2.928299e+00
Critical Value (10%)          -2.602344e+00
dtype: float64
------------------------------------------------------
Hospitality and Event Management
Results of Dickey-Fuller Test:
Test Statistic                 -1.449566
p-value                         0.558297
#Lags Used                      8.000000
Number of Observations Used    27.000000
Critical Value (1%)            -3.699608
Critical Value (5%)            -2.976430
Critical Value (10%)           -2.627601
dtype: float64
------------------------------------------------------

11. MaxAbsScaler Transformation

In [24]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_values=d_it.values
    scaler=MaxAbsScaler()
    demand_values=demand_values.reshape(len(demand_values),1)
    scaler_fit=scaler.fit(demand_values)
    demand_nor=scaler.transform(demand_values)
    index=d_it.index
    #demand_nor=demand_nor.reshape(len(demand_values),1)
    demand_abs=demand_nor.flatten()
    demand_abs=pd.Series(demand_abs,index=index)
    test_stationarity(demand_abs)
    print('------------------------------------------------------')
Sales
Results of Dickey-Fuller Test:
Test Statistic                -6.358995e+00
p-value                        2.501454e-08
#Lags Used                     0.000000e+00
Number of Observations Used    1.070000e+02
Critical Value (1%)           -3.492996e+00
Critical Value (5%)           -2.888955e+00
Critical Value (10%)          -2.581393e+00
dtype: float64
------------------------------------------------------
Information Technology
Results of Dickey-Fuller Test:
Test Statistic                -6.455822e+00
p-value                        1.484382e-08
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Business Consulting and Management
Results of Dickey-Fuller Test:
Test Statistic                -7.549097e+00
p-value                        3.224296e-11
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Retail
Results of Dickey-Fuller Test:
Test Statistic                  -3.920508
p-value                          0.001889
#Lags Used                       1.000000
Number of Observations Used    101.000000
Critical Value (1%)             -3.496818
Critical Value (5%)             -2.890611
Critical Value (10%)            -2.582277
dtype: float64
------------------------------------------------------
Marketing, Advertising and PR
Results of Dickey-Fuller Test:
Test Statistic                -5.852893e+00
p-value                        3.555029e-07
#Lags Used                     0.000000e+00
Number of Observations Used    9.400000e+01
Critical Value (1%)           -3.501912e+00
Critical Value (5%)           -2.892815e+00
Critical Value (10%)          -2.583454e+00
dtype: float64
------------------------------------------------------
Teacher Training and Education
Results of Dickey-Fuller Test:
Test Statistic                -9.778959e+00
p-value                        6.814323e-17
#Lags Used                     0.000000e+00
Number of Observations Used    8.700000e+01
Critical Value (1%)           -3.507853e+00
Critical Value (5%)           -2.895382e+00
Critical Value (10%)          -2.584824e+00
dtype: float64
------------------------------------------------------
Accounting, Finance and Banking
Results of Dickey-Fuller Test:
Test Statistic                 -4.837813
p-value                         0.000046
#Lags Used                      0.000000
Number of Observations Used    97.000000
Critical Value (1%)            -3.499637
Critical Value (5%)            -2.891831
Critical Value (10%)           -2.582928
dtype: float64
------------------------------------------------------
Engineering and Manufacturing
Results of Dickey-Fuller Test:
Test Statistic                -8.544093e+00
p-value                        9.607190e-14
#Lags Used                     0.000000e+00
Number of Observations Used    8.400000e+01
Critical Value (1%)           -3.510712e+00
Critical Value (5%)           -2.896616e+00
Critical Value (10%)          -2.585482e+00
dtype: float64
------------------------------------------------------
Healthcare
Results of Dickey-Fuller Test:
Test Statistic                -8.476088e+00
p-value                        1.434169e-13
#Lags Used                     0.000000e+00
Number of Observations Used    8.200000e+01
Critical Value (1%)           -3.512738e+00
Critical Value (5%)           -2.897490e+00
Critical Value (10%)          -2.585949e+00
dtype: float64
------------------------------------------------------
Media and Internet
Results of Dickey-Fuller Test:
Test Statistic                -7.826999e+00
p-value                        6.441844e-12
#Lags Used                     1.000000e+00
Number of Observations Used    7.500000e+01
Critical Value (1%)           -3.520713e+00
Critical Value (5%)           -2.900925e+00
Critical Value (10%)          -2.587781e+00
dtype: float64
------------------------------------------------------
Recruitment and HR
Results of Dickey-Fuller Test:
Test Statistic                -7.425173e+00
p-value                        6.577578e-11
#Lags Used                     0.000000e+00
Number of Observations Used    6.700000e+01
Critical Value (1%)           -3.531955e+00
Critical Value (5%)           -2.905755e+00
Critical Value (10%)          -2.590357e+00
dtype: float64
------------------------------------------------------
Transport and Logistics
Results of Dickey-Fuller Test:
Test Statistic                 -5.172061
p-value                         0.000010
#Lags Used                      0.000000
Number of Observations Used    61.000000
Critical Value (1%)            -3.542413
Critical Value (5%)            -2.910236
Critical Value (10%)           -2.592745
dtype: float64
------------------------------------------------------
Property and Consultation
Results of Dickey-Fuller Test:
Test Statistic                 -2.044954
p-value                         0.267216
#Lags Used                      5.000000
Number of Observations Used    24.000000
Critical Value (1%)            -3.737709
Critical Value (5%)            -2.992216
Critical Value (10%)           -2.635747
dtype: float64
------------------------------------------------------
Public Service and Administration
Results of Dickey-Fuller Test:
Test Statistic                 -5.225241
p-value                         0.000008
#Lags Used                      0.000000
Number of Observations Used    45.000000
Critical Value (1%)            -3.584829
Critical Value (5%)            -2.928299
Critical Value (10%)           -2.602344
dtype: float64
------------------------------------------------------
Hospitality and Event Management
Results of Dickey-Fuller Test:
Test Statistic                 -5.169368
p-value                         0.000010
#Lags Used                      0.000000
Number of Observations Used    35.000000
Critical Value (1%)            -3.632743
Critical Value (5%)            -2.948510
Critical Value (10%)           -2.613017
dtype: float64
------------------------------------------------------

12. Robust Scaler Transformation

In [25]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_values=d_it.values
    scaler=RobustScaler()
    demand_values=demand_values.reshape(len(demand_values),1)
    scaler_fit=scaler.fit(demand_values)
    demand_nor=scaler.transform(demand_values)
    index=d_it.index
    #demand_nor=demand_nor.reshape(len(demand_values),1)
    demand_rs=demand_nor.flatten()
    demand_rs=pd.Series(demand_rs,index=index)
    test_stationarity(demand_rs)
    print('------------------------------------------------------')
Sales
Results of Dickey-Fuller Test:
Test Statistic                -6.358995e+00
p-value                        2.501454e-08
#Lags Used                     0.000000e+00
Number of Observations Used    1.070000e+02
Critical Value (1%)           -3.492996e+00
Critical Value (5%)           -2.888955e+00
Critical Value (10%)          -2.581393e+00
dtype: float64
------------------------------------------------------
Information Technology
Results of Dickey-Fuller Test:
Test Statistic                -6.455822e+00
p-value                        1.484382e-08
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Business Consulting and Management
Results of Dickey-Fuller Test:
Test Statistic                -7.549097e+00
p-value                        3.224296e-11
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Retail
Results of Dickey-Fuller Test:
Test Statistic                  -3.920508
p-value                          0.001889
#Lags Used                       1.000000
Number of Observations Used    101.000000
Critical Value (1%)             -3.496818
Critical Value (5%)             -2.890611
Critical Value (10%)            -2.582277
dtype: float64
------------------------------------------------------
Marketing, Advertising and PR
Results of Dickey-Fuller Test:
Test Statistic                -5.852893e+00
p-value                        3.555029e-07
#Lags Used                     0.000000e+00
Number of Observations Used    9.400000e+01
Critical Value (1%)           -3.501912e+00
Critical Value (5%)           -2.892815e+00
Critical Value (10%)          -2.583454e+00
dtype: float64
------------------------------------------------------
Teacher Training and Education
Results of Dickey-Fuller Test:
Test Statistic                -9.778959e+00
p-value                        6.814323e-17
#Lags Used                     0.000000e+00
Number of Observations Used    8.700000e+01
Critical Value (1%)           -3.507853e+00
Critical Value (5%)           -2.895382e+00
Critical Value (10%)          -2.584824e+00
dtype: float64
------------------------------------------------------
Accounting, Finance and Banking
Results of Dickey-Fuller Test:
Test Statistic                 -4.837813
p-value                         0.000046
#Lags Used                      0.000000
Number of Observations Used    97.000000
Critical Value (1%)            -3.499637
Critical Value (5%)            -2.891831
Critical Value (10%)           -2.582928
dtype: float64
------------------------------------------------------
Engineering and Manufacturing
Results of Dickey-Fuller Test:
Test Statistic                -8.544093e+00
p-value                        9.607190e-14
#Lags Used                     0.000000e+00
Number of Observations Used    8.400000e+01
Critical Value (1%)           -3.510712e+00
Critical Value (5%)           -2.896616e+00
Critical Value (10%)          -2.585482e+00
dtype: float64
------------------------------------------------------
Healthcare
Results of Dickey-Fuller Test:
Test Statistic                -8.476088e+00
p-value                        1.434169e-13
#Lags Used                     0.000000e+00
Number of Observations Used    8.200000e+01
Critical Value (1%)           -3.512738e+00
Critical Value (5%)           -2.897490e+00
Critical Value (10%)          -2.585949e+00
dtype: float64
------------------------------------------------------
Media and Internet
Results of Dickey-Fuller Test:
Test Statistic                -7.826999e+00
p-value                        6.441844e-12
#Lags Used                     1.000000e+00
Number of Observations Used    7.500000e+01
Critical Value (1%)           -3.520713e+00
Critical Value (5%)           -2.900925e+00
Critical Value (10%)          -2.587781e+00
dtype: float64
------------------------------------------------------
Recruitment and HR
Results of Dickey-Fuller Test:
Test Statistic                -7.425173e+00
p-value                        6.577578e-11
#Lags Used                     0.000000e+00
Number of Observations Used    6.700000e+01
Critical Value (1%)           -3.531955e+00
Critical Value (5%)           -2.905755e+00
Critical Value (10%)          -2.590357e+00
dtype: float64
------------------------------------------------------
Transport and Logistics
Results of Dickey-Fuller Test:
Test Statistic                 -5.172061
p-value                         0.000010
#Lags Used                      0.000000
Number of Observations Used    61.000000
Critical Value (1%)            -3.542413
Critical Value (5%)            -2.910236
Critical Value (10%)           -2.592745
dtype: float64
------------------------------------------------------
Property and Consultation
Results of Dickey-Fuller Test:
Test Statistic                 -2.044954
p-value                         0.267216
#Lags Used                      5.000000
Number of Observations Used    24.000000
Critical Value (1%)            -3.737709
Critical Value (5%)            -2.992216
Critical Value (10%)           -2.635747
dtype: float64
------------------------------------------------------
Public Service and Administration
Results of Dickey-Fuller Test:
Test Statistic                 -5.225241
p-value                         0.000008
#Lags Used                      0.000000
Number of Observations Used    45.000000
Critical Value (1%)            -3.584829
Critical Value (5%)            -2.928299
Critical Value (10%)           -2.602344
dtype: float64
------------------------------------------------------
Hospitality and Event Management
Results of Dickey-Fuller Test:
Test Statistic                 -5.169368
p-value                         0.000010
#Lags Used                      0.000000
Number of Observations Used    35.000000
Critical Value (1%)            -3.632743
Critical Value (5%)            -2.948510
Critical Value (10%)           -2.613017
dtype: float64
------------------------------------------------------

13. Quantile Transformation ( Uniform )

In [26]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_values=d_it.values
    scaler=QuantileTransformer(output_distribution='uniform')
    demand_values=demand_values.reshape(len(demand_values),1)
    scaler_fit=scaler.fit(demand_values)
    demand_nor=scaler.transform(demand_values)
    index=d_it.index
    #demand_nor=demand_nor.reshape(len(demand_values),1)
    demand_qtu=demand_nor.flatten()
    demand_qtu=pd.Series(demand_qtu,index=index)
    test_stationarity(demand_qtu)
    print('------------------------------------------------------')
Sales
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (108). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                -5.964734e+00
p-value                        2.000204e-07
#Lags Used                     0.000000e+00
Number of Observations Used    1.070000e+02
Critical Value (1%)           -3.492996e+00
Critical Value (5%)           -2.888955e+00
Critical Value (10%)          -2.581393e+00
dtype: float64
------------------------------------------------------
Information Technology
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (102). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                 -3.451158
p-value                         0.009334
#Lags Used                      2.000000
Number of Observations Used    99.000000
Critical Value (1%)            -3.498198
Critical Value (5%)            -2.891208
Critical Value (10%)           -2.582596
dtype: float64
------------------------------------------------------
Business Consulting and Management
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (102). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                -6.590355e+00
p-value                        7.140270e-09
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Retail
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (103). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                  -3.685319
p-value                          0.004324
#Lags Used                       1.000000
Number of Observations Used    101.000000
Critical Value (1%)             -3.496818
Critical Value (5%)             -2.890611
Critical Value (10%)            -2.582277
dtype: float64
------------------------------------------------------
Marketing, Advertising and PR
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (95). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                 -4.263988
p-value                         0.000513
#Lags Used                      1.000000
Number of Observations Used    93.000000
Critical Value (1%)            -3.502705
Critical Value (5%)            -2.893158
Critical Value (10%)           -2.583637
dtype: float64
------------------------------------------------------
Teacher Training and Education
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (88). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                -6.756329e+00
p-value                        2.865592e-09
#Lags Used                     1.000000e+00
Number of Observations Used    8.600000e+01
Critical Value (1%)           -3.508783e+00
Critical Value (5%)           -2.895784e+00
Critical Value (10%)          -2.585038e+00
dtype: float64
------------------------------------------------------
Accounting, Finance and Banking
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (98). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                -6.745644e+00
p-value                        3.040036e-09
#Lags Used                     0.000000e+00
Number of Observations Used    9.700000e+01
Critical Value (1%)           -3.499637e+00
Critical Value (5%)           -2.891831e+00
Critical Value (10%)          -2.582928e+00
dtype: float64
------------------------------------------------------
Engineering and Manufacturing
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (85). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                -6.444621e+00
p-value                        1.577091e-08
#Lags Used                     0.000000e+00
Number of Observations Used    8.400000e+01
Critical Value (1%)           -3.510712e+00
Critical Value (5%)           -2.896616e+00
Critical Value (10%)          -2.585482e+00
dtype: float64
------------------------------------------------------
Healthcare
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (83). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                -6.265083e+00
p-value                        4.132845e-08
#Lags Used                     0.000000e+00
Number of Observations Used    8.200000e+01
Critical Value (1%)           -3.512738e+00
Critical Value (5%)           -2.897490e+00
Critical Value (10%)          -2.585949e+00
dtype: float64
------------------------------------------------------
Media and Internet
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (77). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                -8.749215e+00
p-value                        2.866848e-14
#Lags Used                     1.000000e+00
Number of Observations Used    7.500000e+01
Critical Value (1%)           -3.520713e+00
Critical Value (5%)           -2.900925e+00
Critical Value (10%)          -2.587781e+00
dtype: float64
------------------------------------------------------
Recruitment and HR
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (68). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                -6.820546e+00
p-value                        2.007154e-09
#Lags Used                     0.000000e+00
Number of Observations Used    6.700000e+01
Critical Value (1%)           -3.531955e+00
Critical Value (5%)           -2.905755e+00
Critical Value (10%)          -2.590357e+00
dtype: float64
------------------------------------------------------
Transport and Logistics
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (62). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                 -2.343655
p-value                         0.158275
#Lags Used                      8.000000
Number of Observations Used    53.000000
Critical Value (1%)            -3.560242
Critical Value (5%)            -2.917850
Critical Value (10%)           -2.596796
dtype: float64
------------------------------------------------------
Property and Consultation
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (30). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                 -4.317214
p-value                         0.000415
#Lags Used                      0.000000
Number of Observations Used    29.000000
Critical Value (1%)            -3.679060
Critical Value (5%)            -2.967882
Critical Value (10%)           -2.623158
dtype: float64
------------------------------------------------------
Public Service and Administration
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (46). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                -5.934617e+00
p-value                        2.336821e-07
#Lags Used                     0.000000e+00
Number of Observations Used    4.500000e+01
Critical Value (1%)           -3.584829e+00
Critical Value (5%)           -2.928299e+00
Critical Value (10%)          -2.602344e+00
dtype: float64
------------------------------------------------------
Hospitality and Event Management
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (36). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                 -1.471651
p-value                         0.547486
#Lags Used                      8.000000
Number of Observations Used    27.000000
Critical Value (1%)            -3.699608
Critical Value (5%)            -2.976430
Critical Value (10%)           -2.627601
dtype: float64
------------------------------------------------------

14. Quantile Transformation ( Normal )

In [27]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_values=d_it.values
    scaler=QuantileTransformer(output_distribution='normal')
    demand_values=demand_values.reshape(len(demand_values),1)
    scaler_fit=scaler.fit(demand_values)
    demand_nor=scaler.transform(demand_values)
    index=d_it.index
    #demand_nor=demand_nor.reshape(len(demand_values),1)
    demand_qtn=demand_nor.flatten()
    demand_qtn=pd.Series(demand_qtn,index=index)
    test_stationarity(demand_qtn)
    print('------------------------------------------------------')
Sales
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (108). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                -7.601102e+00
p-value                        2.388105e-11
#Lags Used                     0.000000e+00
Number of Observations Used    1.070000e+02
Critical Value (1%)           -3.492996e+00
Critical Value (5%)           -2.888955e+00
Critical Value (10%)          -2.581393e+00
dtype: float64
------------------------------------------------------
Information Technology
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (102). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                -6.320171e+00
p-value                        3.080031e-08
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Business Consulting and Management
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (102). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                -9.399037e+00
p-value                        6.259669e-16
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Retail
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (103). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                  -4.857334
p-value                          0.000042
#Lags Used                       1.000000
Number of Observations Used    101.000000
Critical Value (1%)             -3.496818
Critical Value (5%)             -2.890611
Critical Value (10%)            -2.582277
dtype: float64
------------------------------------------------------
Marketing, Advertising and PR
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (95). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                 -4.620028
p-value                         0.000119
#Lags Used                      1.000000
Number of Observations Used    93.000000
Critical Value (1%)            -3.502705
Critical Value (5%)            -2.893158
Critical Value (10%)           -2.583637
dtype: float64
------------------------------------------------------
Teacher Training and Education
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (88). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                -5.974293e+00
p-value                        1.903662e-07
#Lags Used                     2.000000e+00
Number of Observations Used    8.500000e+01
Critical Value (1%)           -3.509736e+00
Critical Value (5%)           -2.896195e+00
Critical Value (10%)          -2.585258e+00
dtype: float64
------------------------------------------------------
Accounting, Finance and Banking
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (98). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                -7.898862e+00
p-value                        4.238043e-12
#Lags Used                     0.000000e+00
Number of Observations Used    9.700000e+01
Critical Value (1%)           -3.499637e+00
Critical Value (5%)           -2.891831e+00
Critical Value (10%)          -2.582928e+00
dtype: float64
------------------------------------------------------
Engineering and Manufacturing
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (85). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                -7.773939e+00
p-value                        8.770742e-12
#Lags Used                     0.000000e+00
Number of Observations Used    8.400000e+01
Critical Value (1%)           -3.510712e+00
Critical Value (5%)           -2.896616e+00
Critical Value (10%)          -2.585482e+00
dtype: float64
------------------------------------------------------
Healthcare
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (83). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                -6.533636e+00
p-value                        9.730056e-09
#Lags Used                     0.000000e+00
Number of Observations Used    8.200000e+01
Critical Value (1%)           -3.512738e+00
Critical Value (5%)           -2.897490e+00
Critical Value (10%)          -2.585949e+00
dtype: float64
------------------------------------------------------
Media and Internet
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (77). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                -9.383022e+00
p-value                        6.875869e-16
#Lags Used                     1.000000e+00
Number of Observations Used    7.500000e+01
Critical Value (1%)           -3.520713e+00
Critical Value (5%)           -2.900925e+00
Critical Value (10%)          -2.587781e+00
dtype: float64
------------------------------------------------------
Recruitment and HR
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (68). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                -7.136262e+00
p-value                        3.414822e-10
#Lags Used                     0.000000e+00
Number of Observations Used    6.700000e+01
Critical Value (1%)           -3.531955e+00
Critical Value (5%)           -2.905755e+00
Critical Value (10%)          -2.590357e+00
dtype: float64
------------------------------------------------------
Transport and Logistics
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (62). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                 -5.345754
p-value                         0.000004
#Lags Used                      0.000000
Number of Observations Used    61.000000
Critical Value (1%)            -3.542413
Critical Value (5%)            -2.910236
Critical Value (10%)           -2.592745
dtype: float64
------------------------------------------------------
Property and Consultation
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (30). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                 -4.256079
p-value                         0.000529
#Lags Used                      0.000000
Number of Observations Used    29.000000
Critical Value (1%)            -3.679060
Critical Value (5%)            -2.967882
Critical Value (10%)           -2.623158
dtype: float64
------------------------------------------------------
Public Service and Administration
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (46). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                -5.942649e+00
p-value                        2.241978e-07
#Lags Used                     0.000000e+00
Number of Observations Used    4.500000e+01
Critical Value (1%)           -3.584829e+00
Critical Value (5%)           -2.928299e+00
Critical Value (10%)          -2.602344e+00
dtype: float64
------------------------------------------------------
Hospitality and Event Management
C:\Users\Aman\Anaconda3\lib\site-packages\sklearn\preprocessing\_data.py:2357: UserWarning: n_quantiles (1000) is greater than the total number of samples (36). n_quantiles is set to n_samples.
  % (self.n_quantiles, n_samples))
Results of Dickey-Fuller Test:
Test Statistic                -5.771570e+00
p-value                        5.377202e-07
#Lags Used                     0.000000e+00
Number of Observations Used    3.500000e+01
Critical Value (1%)           -3.632743e+00
Critical Value (5%)           -2.948510e+00
Critical Value (10%)          -2.613017e+00
dtype: float64
------------------------------------------------------

15. Function Transformation

In [28]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_values=d_it.values
    scaler=FunctionTransformer(np.log1p)
    demand_values=demand_values.reshape(len(demand_values),1)
    scaler_fit=scaler.fit(demand_values)
    demand_nor=scaler.transform(demand_values)
    index=d_it.index
    #demand_nor=demand_nor.reshape(len(demand_values),1)
    demand_ft=demand_nor.flatten()
    demand_ft=pd.Series(demand_ft,index=index)
    test_stationarity(demand_ft)
    print('------------------------------------------------------')
Sales
Results of Dickey-Fuller Test:
Test Statistic                -6.229858e+00
p-value                        4.983949e-08
#Lags Used                     0.000000e+00
Number of Observations Used    1.070000e+02
Critical Value (1%)           -3.492996e+00
Critical Value (5%)           -2.888955e+00
Critical Value (10%)          -2.581393e+00
dtype: float64
------------------------------------------------------
Information Technology
Results of Dickey-Fuller Test:
Test Statistic                -5.868223e+00
p-value                        3.286893e-07
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Business Consulting and Management
Results of Dickey-Fuller Test:
Test Statistic                -6.917668e+00
p-value                        1.168130e-09
#Lags Used                     0.000000e+00
Number of Observations Used    1.010000e+02
Critical Value (1%)           -3.496818e+00
Critical Value (5%)           -2.890611e+00
Critical Value (10%)          -2.582277e+00
dtype: float64
------------------------------------------------------
Retail
Results of Dickey-Fuller Test:
Test Statistic                  -3.728389
p-value                          0.003731
#Lags Used                       1.000000
Number of Observations Used    101.000000
Critical Value (1%)             -3.496818
Critical Value (5%)             -2.890611
Critical Value (10%)            -2.582277
dtype: float64
------------------------------------------------------
Marketing, Advertising and PR
Results of Dickey-Fuller Test:
Test Statistic                 -4.190070
p-value                         0.000685
#Lags Used                      1.000000
Number of Observations Used    93.000000
Critical Value (1%)            -3.502705
Critical Value (5%)            -2.893158
Critical Value (10%)           -2.583637
dtype: float64
------------------------------------------------------
Teacher Training and Education
Results of Dickey-Fuller Test:
Test Statistic                -8.934530e+00
p-value                        9.613822e-15
#Lags Used                     0.000000e+00
Number of Observations Used    8.700000e+01
Critical Value (1%)           -3.507853e+00
Critical Value (5%)           -2.895382e+00
Critical Value (10%)          -2.584824e+00
dtype: float64
------------------------------------------------------
Accounting, Finance and Banking
Results of Dickey-Fuller Test:
Test Statistic                -6.103998e+00
p-value                        9.682859e-08
#Lags Used                     0.000000e+00
Number of Observations Used    9.700000e+01
Critical Value (1%)           -3.499637e+00
Critical Value (5%)           -2.891831e+00
Critical Value (10%)          -2.582928e+00
dtype: float64
------------------------------------------------------
Engineering and Manufacturing
Results of Dickey-Fuller Test:
Test Statistic                -6.995656e+00
p-value                        7.545713e-10
#Lags Used                     0.000000e+00
Number of Observations Used    8.400000e+01
Critical Value (1%)           -3.510712e+00
Critical Value (5%)           -2.896616e+00
Critical Value (10%)          -2.585482e+00
dtype: float64
------------------------------------------------------
Healthcare
Results of Dickey-Fuller Test:
Test Statistic                -6.930377e+00
p-value                        1.087987e-09
#Lags Used                     0.000000e+00
Number of Observations Used    8.200000e+01
Critical Value (1%)           -3.512738e+00
Critical Value (5%)           -2.897490e+00
Critical Value (10%)          -2.585949e+00
dtype: float64
------------------------------------------------------
Media and Internet
Results of Dickey-Fuller Test:
Test Statistic                -8.452000e+00
p-value                        1.652767e-13
#Lags Used                     1.000000e+00
Number of Observations Used    7.500000e+01
Critical Value (1%)           -3.520713e+00
Critical Value (5%)           -2.900925e+00
Critical Value (10%)          -2.587781e+00
dtype: float64
------------------------------------------------------
Recruitment and HR
Results of Dickey-Fuller Test:
Test Statistic                -6.957990e+00
p-value                        9.321231e-10
#Lags Used                     0.000000e+00
Number of Observations Used    6.700000e+01
Critical Value (1%)           -3.531955e+00
Critical Value (5%)           -2.905755e+00
Critical Value (10%)          -2.590357e+00
dtype: float64
------------------------------------------------------
Transport and Logistics
Results of Dickey-Fuller Test:
Test Statistic                 -5.348490
p-value                         0.000004
#Lags Used                      0.000000
Number of Observations Used    61.000000
Critical Value (1%)            -3.542413
Critical Value (5%)            -2.910236
Critical Value (10%)           -2.592745
dtype: float64
------------------------------------------------------
Property and Consultation
Results of Dickey-Fuller Test:
Test Statistic                 -3.505275
p-value                         0.007853
#Lags Used                      0.000000
Number of Observations Used    29.000000
Critical Value (1%)            -3.679060
Critical Value (5%)            -2.967882
Critical Value (10%)           -2.623158
dtype: float64
------------------------------------------------------
Public Service and Administration
Results of Dickey-Fuller Test:
Test Statistic                 -5.499734
p-value                         0.000002
#Lags Used                      0.000000
Number of Observations Used    45.000000
Critical Value (1%)            -3.584829
Critical Value (5%)            -2.928299
Critical Value (10%)           -2.602344
dtype: float64
------------------------------------------------------
Hospitality and Event Management
Results of Dickey-Fuller Test:
Test Statistic                 -5.211292
p-value                         0.000008
#Lags Used                      0.000000
Number of Observations Used    35.000000
Critical Value (1%)            -3.632743
Critical Value (5%)            -2.948510
Critical Value (10%)           -2.613017
dtype: float64
------------------------------------------------------
In [12]:
p=d=q=range(0,5)
pdq=list(itertools.product(p,d,q))
pdq[0:10]
Out[12]:
[(0, 0, 0),
 (0, 0, 1),
 (0, 0, 2),
 (0, 0, 3),
 (0, 0, 4),
 (0, 1, 0),
 (0, 1, 1),
 (0, 1, 2),
 (0, 1, 3),
 (0, 1, 4)]
In [13]:
def arima_model(ts): 
    aic=[]
    for param in pdq:   
        try:
            demand_model=ARIMA(ts,order=param)
            demand_model_fit=demand_model.fit()
            #print(param,"AIC = "+str(demand_model_fit.aic))
            par=[param,demand_model_fit.aic]
            aic.append(par)
        except:
            continue

    aic_df=pd.DataFrame(aic)
    aic_df=aic_df.set_index(aic_df[0])
    aic_df=aic_df[1]
    aic_sort=aic_df.sort_values(ascending=True)
    best_aic=aic_sort.index[0]
    print("Best values of p, d & q for ARIMA model are p = %d, d = %d & q = %d"%(best_aic[0],best_aic[1],best_aic[2]))
    print("AIC =",aic_sort[0])
    #print("____________________________________________________________________")
    
    t=len(ts)-25
    demand_train=ts[0:t]
    demand_test=ts[t:]
    demand_forecast=demand_model_fit.forecast(steps=25)[0]
    jobs_error=mean_squared_error(demand_test,demand_forecast)
    print("RSME = ",np.sqrt(jobs_error))
    print("_____________________________________________________________________")

Log Transfromation

In [27]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_log=np.log(d_it)
    #demand_log.dropna(inplace=True)
    arima_model(demand_log)
    #print('------------------------------------------------------')
Sales
Best values of p, d & q for ARIMA model are p = 2, d = 0 & q = 4
AIC = 275.32903520159
RSME =  0.7885031416417557
_____________________________________________________________________
Information Technology
Best values of p, d & q for ARIMA model are p = 3, d = 0 & q = 3
AIC = 253.40671845927145
RSME =  0.8840069347196734
_____________________________________________________________________
Business Consulting and Management
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 262.4973573806667
RSME =  1.7670658929889056
_____________________________________________________________________
Retail
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 1
AIC = 236.82492103898926
RSME =  0.7984958927099023
_____________________________________________________________________
Marketing, Advertising and PR
Best values of p, d & q for ARIMA model are p = 4, d = 0 & q = 4
AIC = 214.7882752872958
RSME =  2.626644747822856
_____________________________________________________________________
Teacher Training and Education
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 250.81066953096996
RSME =  1.4409725511734173
_____________________________________________________________________
Accounting, Finance and Banking
Best values of p, d & q for ARIMA model are p = 3, d = 0 & q = 0
AIC = 228.18202144026634
RSME =  1.1721869981824249
_____________________________________________________________________
Engineering and Manufacturing
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 216.38192766130246
RSME =  1.2925426251115566
_____________________________________________________________________
Healthcare
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 191.0871643702017
RSME =  1.059045847109904
_____________________________________________________________________
Media and Internet
Best values of p, d & q for ARIMA model are p = 2, d = 0 & q = 0
AIC = 143.91949590770145
RSME =  1.142449145567524
_____________________________________________________________________
Recruitment and HR
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 141.4395427936887
RSME =  3.3894481186143715
_____________________________________________________________________
Transport and Logistics
Best values of p, d & q for ARIMA model are p = 3, d = 0 & q = 3
AIC = 124.63787338743924
RSME =  2.095083778980159
_____________________________________________________________________
Property and Consultation
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = 79.95939870898971
RSME =  1.159842137228647
_____________________________________________________________________
Public Service and Administration
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 69.14018284152695
RSME =  0.9471349807026902
_____________________________________________________________________
Hospitality and Event Management
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 50.39389595517381
RSME =  0.5518892975528984
_____________________________________________________________________

MinMax Scaler

In [24]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_values=d_it.values
    scaler=MinMaxScaler(feature_range=(0,1))
    demand_values=demand_values.reshape(len(demand_values),1)
    scaler_fit=scaler.fit(demand_values)
    demand_nor=scaler.transform(demand_values)
    index=d_it.index
    #demand_nor=demand_nor.reshape(len(demand_values),1)
    demand_nor=demand_nor.flatten()
    demand_nor=pd.Series(demand_nor,index=index)
    arima_model(demand_nor)
Sales
Best values of p, d & q for ARIMA model are p = 3, d = 0 & q = 2
AIC = -31.679898773477078
RSME =  0.27598260417463627
_____________________________________________________________________
Information Technology
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = -90.03618055373869
RSME =  0.12156570808590199
_____________________________________________________________________
Business Consulting and Management
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = -65.49811472400546
RSME =  0.36745234416886385
_____________________________________________________________________
Retail
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 1
AIC = -97.88294780926742
RSME =  0.14397739626315423
_____________________________________________________________________
Marketing, Advertising and PR
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = -37.55106689825203
RSME =  0.3033692266971649
_____________________________________________________________________
Teacher Training and Education
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = -54.1569143666828
RSME =  0.20608484515930925
_____________________________________________________________________
Accounting, Finance and Banking
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = -129.95544657368754
RSME =  0.09522418851524139
_____________________________________________________________________
Engineering and Manufacturing
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = -103.74043534822974
RSME =  0.23346959178764104
_____________________________________________________________________
Healthcare
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = -85.83418501357926
RSME =  0.2522918856312856
_____________________________________________________________________
Media and Internet
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 0.12919967057345616
RSME =  0.29856761611170696
_____________________________________________________________________
Recruitment and HR
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = -35.918505247168596
RSME =  0.23304779700350442
_____________________________________________________________________
Transport and Logistics
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = -42.691896587625905
RSME =  0.31640499715053744
_____________________________________________________________________
Property and Consultation
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = -9.516883544860207
RSME =  0.5772913037942815
_____________________________________________________________________
Public Service and Administration
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = 0.30968129483377993
RSME =  0.43334738055647287
_____________________________________________________________________
Hospitality and Event Management
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 0.44737333757701236
RSME =  0.3009797633623522
_____________________________________________________________________

Standard Scaler

In [25]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_values=d_it.values
    demand_values=demand_values.reshape(len(demand_values),1)
    std_scaler=StandardScaler()
    std_scaler_fit=std_scaler.fit(demand_values)
    demand_std=std_scaler.transform(demand_values)
    index=d_it.index
    demand_std=demand_std.flatten()
    demand_std=pd.Series(demand_std,index=index)
    arima_model(demand_std)
Sales
Best values of p, d & q for ARIMA model are p = 3, d = 0 & q = 2
AIC = 286.6008725303059
RSME =  1.204585899908434
_____________________________________________________________________
Information Technology
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 277.179610728475
RSME =  0.7511068179247948
_____________________________________________________________________
Business Consulting and Management
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 288.13445947847026
RSME =  2.0799075227141732
_____________________________________________________________________
Retail
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 1
AIC = 269.43537789345635
RSME =  1.036945056755207
_____________________________________________________________________
Marketing, Advertising and PR
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 253.7137687180417
RSME =  1.4713935314192192
_____________________________________________________________________
Teacher Training and Education
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 253.7331818440224
RSME =  1.185191941010428
_____________________________________________________________________
Accounting, Finance and Banking
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 239.28166306167412
RSME =  0.8723443511624673
_____________________________________________________________________
Engineering and Manufacturing
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 245.2195506447943
RSME =  1.8184668759877956
_____________________________________________________________________
Healthcare
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 239.54379651197567
RSME =  1.3867401998026565
_____________________________________________________________________
Media and Internet
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 222.51653411351955
RSME =  1.265296670606006
_____________________________________________________________________
Recruitment and HR
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 196.97564051583544
RSME =  1.2918634218064693
_____________________________________________________________________
Transport and Logistics
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = 169.9175568244902
RSME =  1.66353273641682
_____________________________________________________________________
Property and Consultation
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = 79.6340027891562
RSME =  2.090198292197126
_____________________________________________________________________
Public Service and Administration
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = 133.58308453057623
RSME =  1.8448643319975888
_____________________________________________________________________
Hospitality and Event Management
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 106.16357439073643
RSME =  1.1545561173848224
_____________________________________________________________________

Square Root Transformation

In [26]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_sqrt=np.sqrt(d_it)
    arima_model(demand_sqrt)
Sales
Best values of p, d & q for ARIMA model are p = 2, d = 0 & q = 4
AIC = 343.7982498514538
RSME =  1.3976109589716599
_____________________________________________________________________
Information Technology
Best values of p, d & q for ARIMA model are p = 4, d = 1 & q = 3
AIC = 353.8453169708843
RSME =  2.2108098692793607
_____________________________________________________________________
Business Consulting and Management
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 325.2279427507536
RSME =  2.9257245278710267
_____________________________________________________________________
Retail
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 1
AIC = 275.388833182595
RSME =  1.702883572615183
_____________________________________________________________________
Marketing, Advertising and PR
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 252.40684043589732
RSME =  2.1449918824341627
_____________________________________________________________________
Teacher Training and Education
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 292.3111940003716
RSME =  1.5509057005759983
_____________________________________________________________________
Accounting, Finance and Banking
Best values of p, d & q for ARIMA model are p = 3, d = 0 & q = 2
AIC = 243.50007046125276
RSME =  0.6419060446587455
_____________________________________________________________________
Engineering and Manufacturing
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 237.09374711681485
RSME =  1.4759356941363422
_____________________________________________________________________
Healthcare
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 193.18373918254866
RSME =  1.1267276412852822
_____________________________________________________________________
Media and Internet
Best values of p, d & q for ARIMA model are p = 2, d = 0 & q = 0
AIC = 108.16891327601577
RSME =  0.8227285419648378
_____________________________________________________________________
Recruitment and HR
Best values of p, d & q for ARIMA model are p = 4, d = 0 & q = 2
AIC = 117.64397047259858
RSME =  0.7230678732902366
_____________________________________________________________________
Transport and Logistics
Best values of p, d & q for ARIMA model are p = 2, d = 0 & q = 1
AIC = 109.26276964955423
RSME =  1.4093987410844104
_____________________________________________________________________
Property and Consultation
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = 89.6187217194704
RSME =  2.9031539740481693
_____________________________________________________________________
Public Service and Administration
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = 33.01443630794037
RSME =  0.6299410453505103
_____________________________________________________________________
Hospitality and Event Management
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 23.203934404342093
RSME =  0.5523165187278102
_____________________________________________________________________

Cube Root Transformation

In [25]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_cbrt=np.cbrt(d_it)
    arima_model(demand_cbrt)
Sales
Best values of p, d & q for ARIMA model are p = 2, d = 0 & q = 4
AIC = 175.52461748547887
RSME =  0.574353384796916
_____________________________________________________________________
Information Technology
Best values of p, d & q for ARIMA model are p = 3, d = 0 & q = 2
AIC = 176.9226735583813
RSME =  0.8906128351349843
_____________________________________________________________________
Business Consulting and Management
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 169.1826956671387
RSME =  1.346920343161983
_____________________________________________________________________
Retail
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 1
AIC = 125.26980513729899
RSME =  0.6517309483320435
_____________________________________________________________________
Marketing, Advertising and PR
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 115.07376356389432
RSME =  1.210140681980262
_____________________________________________________________________
Teacher Training and Education
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 160.3722470449173
RSME =  0.766494177811002
_____________________________________________________________________
Accounting, Finance and Banking
Best values of p, d & q for ARIMA model are p = 2, d = 0 & q = 3
AIC = 106.6234326981579
RSME =  0.31657207050490727
_____________________________________________________________________
Engineering and Manufacturing
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 116.58571977094621
RSME =  0.6819032926918319
_____________________________________________________________________
Healthcare
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 82.84433999476332
RSME =  0.5135804498680497
_____________________________________________________________________
Media and Internet
Best values of p, d & q for ARIMA model are p = 2, d = 0 & q = 0
AIC = 20.51892937275437
RSME =  0.4841136186477728
_____________________________________________________________________
Recruitment and HR
Best values of p, d & q for ARIMA model are p = 4, d = 0 & q = 2
AIC = 37.008501048962216
RSME =  0.36154061403856963
_____________________________________________________________________
Transport and Logistics
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 3
AIC = 34.09511985687871
RSME =  0.8198580617119491
_____________________________________________________________________
Property and Consultation
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = 46.437103438504906
RSME =  0.6297048292787004
_____________________________________________________________________
Public Service and Administration
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = -13.949782686634101
RSME =  0.3804499937263496
_____________________________________________________________________
Hospitality and Event Management
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = -14.025356426959249
RSME =  0.23276121312372322
_____________________________________________________________________

Differencing Transformation (Shift 1)

In [24]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_diff=d_it.diff(periods=1)
    demand_diff.dropna(inplace=True)
    arima_model(demand_diff)
Sales
Best values of p, d & q for ARIMA model are p = 3, d = 0 & q = 3
AIC = 763.4420700947577
RSME =  11.10777010949052
_____________________________________________________________________
Information Technology
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 2
AIC = 810.6585470209013
RSME =  17.53976289958379
_____________________________________________________________________
Business Consulting and Management
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 1
AIC = 715.5868820607197
RSME =  13.820580614410387
_____________________________________________________________________
Retail
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = 649.0437014750084
RSME =  5.309075050095879
_____________________________________________________________________
Marketing, Advertising and PR
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 1
AIC = 588.8586896962495
RSME =  6.253372500698171
_____________________________________________________________________
Teacher Training and Education
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = 629.6886667144853
RSME =  12.56058981175724
_____________________________________________________________________
Accounting, Finance and Banking
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 1
AIC = 587.5673938620794
RSME =  5.906034460185458
_____________________________________________________________________
Engineering and Manufacturing
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = 544.4831180426154
RSME =  13.058548874512084
_____________________________________________________________________
Healthcare
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = 462.6655057389737
RSME =  7.662928099655245
_____________________________________________________________________
Media and Internet
Best values of p, d & q for ARIMA model are p = 2, d = 0 & q = 1
AIC = 299.9132888629664
RSME =  1.6940535058813406
_____________________________________________________________________
Recruitment and HR
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = 302.24182609514105
RSME =  2.647375865169732
_____________________________________________________________________
Transport and Logistics
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 2
AIC = 285.9539942812547
RSME =  2.2734042715915295
_____________________________________________________________________
Property and Consultation
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 2
AIC = 197.13097906783426
RSME =  10.396307695605486
_____________________________________________________________________
Public Service and Administration
Best values of p, d & q for ARIMA model are p = 3, d = 0 & q = 1
AIC = 130.83775258338858
RSME =  1.1444302595295417
_____________________________________________________________________
Hospitality and Event Management
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = 103.92424083149437
RSME =  1.3219036684239411
_____________________________________________________________________

Differencing Transformation (Shift 2)

In [23]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_diff2=d_it.diff(periods=2)
    demand_diff2.dropna(inplace=True)
    arima_model(demand_diff2)
Sales
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 2
AIC = 761.2872601913655
RSME =  11.002423591936303
_____________________________________________________________________
Information Technology
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 3
AIC = 808.6923859274741
RSME =  6.328482769150546
_____________________________________________________________________
Business Consulting and Management
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 2
AIC = 714.8815911690676
RSME =  13.318524881458812
_____________________________________________________________________
Retail
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 2
AIC = 648.967812972407
RSME =  4.215252052422355
_____________________________________________________________________
Marketing, Advertising and PR
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 2
AIC = 587.9566046918537
RSME =  7.728637912947237
_____________________________________________________________________
Teacher Training and Education
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 2
AIC = 628.6056011620414
RSME =  13.162951978003726
_____________________________________________________________________
Accounting, Finance and Banking
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 2
AIC = 588.3133541035083
RSME =  3.524205309148304
_____________________________________________________________________
Engineering and Manufacturing
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 2
AIC = 542.8621463245539
RSME =  18.730202119695697
_____________________________________________________________________
Healthcare
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 2
AIC = 461.6880543600978
RSME =  7.902521387882865
_____________________________________________________________________
Media and Internet
Best values of p, d & q for ARIMA model are p = 1, d = 1 & q = 4
AIC = 300.3647173487541
RSME =  1.5446594012619526
_____________________________________________________________________
Recruitment and HR
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 2
AIC = 302.91996552601427
RSME =  3.6885189213420952
_____________________________________________________________________
Transport and Logistics
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 3
AIC = 288.4260052815111
RSME =  2.517991147322248
_____________________________________________________________________
Property and Consultation
Best values of p, d & q for ARIMA model are p = 3, d = 0 & q = 2
AIC = 202.84713897921574
RSME =  12.856254917521948
_____________________________________________________________________
Public Service and Administration
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 3
AIC = 133.39177605650562
RSME =  1.2084908407764696
_____________________________________________________________________
Hospitality and Event Management
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 2
AIC = 106.41106759328957
RSME =  2.1760562999353135
_____________________________________________________________________

Power Transformation ( Box-Cox )

In [17]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_values=d_it.values   
    scaler=PowerTransformer(method='box-cox')
    demand_values=demand_values.reshape(len(demand_values),1)
    scaler_fit=scaler.fit(demand_values)
    demand_nor=scaler.transform(demand_values)
    index=d_it.index
    #demand_nor=demand_nor.reshape(len(demand_values),1)
    demand_pow_bc=demand_nor.flatten()
    demand_pow_bc=pd.Series(demand_pow_bc,index=index)
    arima_model(demand_pow_bc)
Sales
Best values of p, d & q for ARIMA model are p = 2, d = 0 & q = 4
AIC = 281.7555905495641
RSME =  1.126114384032662
_____________________________________________________________________
Information Technology
Best values of p, d & q for ARIMA model are p = 3, d = 0 & q = 2
AIC = 262.7484362796766
RSME =  1.456052144463827
_____________________________________________________________________
Business Consulting and Management
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 283.9949473487979
RSME =  1.9400654245256326
_____________________________________________________________________
Retail
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 1
AIC = 263.65028286849974
RSME =  1.3140004224393844
_____________________________________________________________________
Marketing, Advertising and PR
Best values of p, d & q for ARIMA model are p = 2, d = 0 & q = 3
AIC = 251.15683276374352
RSME =  2.667287940097274
_____________________________________________________________________
Teacher Training and Education
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 253.7331818440224
RSME =  1.5396657105401912
_____________________________________________________________________
Accounting, Finance and Banking
Best values of p, d & q for ARIMA model are p = 3, d = 0 & q = 0
AIC = 265.5486604329857
RSME =  0.6931391658221512
_____________________________________________________________________
Engineering and Manufacturing
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 241.21513482989081
RSME =  1.4943092424531794
_____________________________________________________________________
Healthcare
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 233.70235737416704
RSME =  2.442447751254534
_____________________________________________________________________
Media and Internet
Best values of p, d & q for ARIMA model are p = 2, d = 0 & q = 0
AIC = 218.34103136888598
RSME =  1.8702410107564764
_____________________________________________________________________
Recruitment and HR
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = 196.71397106436237
RSME =  4.993690484780127
_____________________________________________________________________
Transport and Logistics
Best values of p, d & q for ARIMA model are p = 3, d = 0 & q = 3
AIC = 166.8853626682173
RSME =  3.008697665021374
_____________________________________________________________________
Property and Consultation
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 89.13631199228035
RSME =  4.8728178016955175
_____________________________________________________________________
Public Service and Administration
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 134.54234505482987
RSME =  1.2771184467011414
_____________________________________________________________________
Hospitality and Event Management
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 106.16357439073641
RSME =  1.0436649204851303
_____________________________________________________________________

Power Transformation ( Yeo-Johnson )

In [18]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_values=d_it.values   
    scaler=PowerTransformer(method='yeo-johnson')
    demand_values=demand_values.reshape(len(demand_values),1)
    scaler_fit=scaler.fit(demand_values)
    demand_nor=scaler.transform(demand_values)
    index=d_it.index
    #demand_nor=demand_nor.reshape(len(demand_values),1)
    demand_pow_yj=demand_nor.flatten()
    demand_pow_yj=pd.Series(demand_pow_yj,index=index)
    arima_model(demand_pow_yj)
Sales
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 285.1217144408242
RSME =  0.9818772076321097
_____________________________________________________________________
Information Technology
Best values of p, d & q for ARIMA model are p = 3, d = 0 & q = 2
AIC = 262.77914694651724
RSME =  0.9493838535179625
_____________________________________________________________________
Business Consulting and Management
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 283.3134967948546
RSME =  2.405165266686819
_____________________________________________________________________
Retail
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 1
AIC = 262.9852648279075
RSME =  0.8384594680208399
_____________________________________________________________________
Marketing, Advertising and PR
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 251.01026416996655
RSME =  2.531991019803871
_____________________________________________________________________
Teacher Training and Education
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 253.7331818440224
RSME =  1.5129471573236604
_____________________________________________________________________
Accounting, Finance and Banking
Best values of p, d & q for ARIMA model are p = 3, d = 0 & q = 0
AIC = 265.16994950772386
RSME =  1.4637253059642659
_____________________________________________________________________
Engineering and Manufacturing
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 240.5643874418053
RSME =  1.031009689080617
_____________________________________________________________________
Healthcare
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 233.7501733404829
RSME =  2.008137667786385
_____________________________________________________________________
Media and Internet
Best values of p, d & q for ARIMA model are p = 2, d = 0 & q = 0
AIC = 218.7527240926738
RSME =  1.865792881341432
_____________________________________________________________________
Recruitment and HR
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = 196.68581853039458
RSME =  5.062125667215472
_____________________________________________________________________
Transport and Logistics
Best values of p, d & q for ARIMA model are p = 3, d = 0 & q = 4
AIC = 168.00565278798274
RSME =  3.0116512258897226
_____________________________________________________________________
Property and Consultation
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 89.13631199228035
RSME =  4.324376595809386
_____________________________________________________________________
Public Service and Administration
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 134.54234505482987
RSME =  1.9381270320980348
_____________________________________________________________________
Hospitality and Event Management
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 106.16357439073641
RSME =  1.0485596664743293
_____________________________________________________________________

MaxAbsScaler Transformation

In [28]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_values=d_it.values
    scaler=MaxAbsScaler()
    demand_values=demand_values.reshape(len(demand_values),1)
    scaler_fit=scaler.fit(demand_values)
    demand_nor=scaler.transform(demand_values)
    index=d_it.index
    #demand_nor=demand_nor.reshape(len(demand_values),1)
    demand_abs=demand_nor.flatten()
    demand_abs=pd.Series(demand_abs,index=index)
    arima_model(demand_abs)
Sales
Best values of p, d & q for ARIMA model are p = 3, d = 0 & q = 2
AIC = -37.01350309298522
RSME =  0.2692630053913599
_____________________________________________________________________
Information Technology
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = -92.4504099137273
RSME =  0.11956365268058505
_____________________________________________________________________
Business Consulting and Management
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = -69.88538058908222
RSME =  0.3596337767020275
_____________________________________________________________________
Retail
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 1
AIC = -103.23389800833439
RSME =  0.13566804582965913
_____________________________________________________________________
Marketing, Advertising and PR
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = -44.46091929071838
RSME =  0.3862140467411085
_____________________________________________________________________
Teacher Training and Education
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = -57.712590854566145
RSME =  0.20196309702062726
_____________________________________________________________________
Accounting, Finance and Banking
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = -134.91773693860836
RSME =  0.12925827925499073
_____________________________________________________________________
Engineering and Manufacturing
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = -107.4768494904215
RSME =  0.2283938717409874
_____________________________________________________________________
Healthcare
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = -91.8712139459445
RSME =  0.19896105101578293
_____________________________________________________________________
Media and Internet
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = -20.43463479360298
RSME =  0.2612427732621336
_____________________________________________________________________
Recruitment and HR
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = -46.804313490769516
RSME =  0.2151534162724743
_____________________________________________________________________
Transport and Logistics
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = -51.247012652007896
RSME =  0.2953081946075622
_____________________________________________________________________
Property and Consultation
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = -11.363183064865382
RSME =  0.4586994927421273
_____________________________________________________________________
Public Service and Administration
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = -20.219525426073545
RSME =  0.3466791894347212
_____________________________________________________________________
Hospitality and Event Management
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = -15.618962357046101
RSME =  0.24078443443810327
_____________________________________________________________________

Robust Scaler Transformation

In [19]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_values=d_it.values
    scaler=RobustScaler()
    demand_values=demand_values.reshape(len(demand_values),1)
    scaler_fit=scaler.fit(demand_values)
    demand_nor=scaler.transform(demand_values)
    index=d_it.index
    #demand_nor=demand_nor.reshape(len(demand_values),1)
    demand_rs=demand_nor.flatten()
    demand_rs=pd.Series(demand_rs,index=index)
    arima_model(demand_rs)
Sales
Best values of p, d & q for ARIMA model are p = 3, d = 0 & q = 2
AIC = 228.37822696092405
RSME =  0.9200300526002491
_____________________________________________________________________
Information Technology
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 365.6166326337085
RSME =  1.146356527587951
_____________________________________________________________________
Business Consulting and Management
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 231.06395622674077
RSME =  1.5723398893520462
_____________________________________________________________________
Retail
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 1
AIC = 223.09484350832173
RSME =  0.8140697298220156
_____________________________________________________________________
Marketing, Advertising and PR
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 193.56404472340154
RSME =  1.3517422612281857
_____________________________________________________________________
Teacher Training and Education
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 308.269120481085
RSME =  1.6157035570879055
_____________________________________________________________________
Accounting, Finance and Banking
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 316.38894128822596
RSME =  1.2920871149930468
_____________________________________________________________________
Engineering and Manufacturing
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 269.7877427989275
RSME =  2.1012077994588507
_____________________________________________________________________
Healthcare
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 278.9050948242332
RSME =  1.858755247172703
_____________________________________________________________________
Media and Internet
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 193.05469681886015
RSME =  1.0449872563382196
_____________________________________________________________________
Recruitment and HR
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 207.76078256784692
RSME =  1.39840964323702
_____________________________________________________________________
Transport and Logistics
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = 198.60096189523293
RSME =  2.347353718032643
_____________________________________________________________________
Property and Consultation
Best values of p, d & q for ARIMA model are p = 0, d = 1 & q = 2
AIC = 197.13097906783426
RSME =  18.46743506396064
_____________________________________________________________________
Public Service and Administration
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = 76.36411002780483
RSME =  0.9905070247887058
_____________________________________________________________________
Hospitality and Event Management
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 100.26056733820914
RSME =  1.2039178562108028
_____________________________________________________________________

Quantile Transformation ( Uniform )

In [20]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_values=d_it.values
    scaler=QuantileTransformer(output_distribution='uniform')
    demand_values=demand_values.reshape(len(demand_values),1)
    scaler_fit=scaler.fit(demand_values)
    demand_nor=scaler.transform(demand_values)
    index=d_it.index
    #demand_nor=demand_nor.reshape(len(demand_values),1)
    demand_qtu=demand_nor.flatten()
    demand_qtu=pd.Series(demand_qtu,index=index)
    arima_model(demand_qtu)
Sales
Best values of p, d & q for ARIMA model are p = 4, d = 0 & q = 1
AIC = 17.16358098510227
RSME =  0.3434380083017526
_____________________________________________________________________
Information Technology
Best values of p, d & q for ARIMA model are p = 3, d = 0 & q = 0
AIC = 16.018190844450544
RSME =  2.1289022703599847
_____________________________________________________________________
Business Consulting and Management
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 29.356512392824754
RSME =  0.6943861385080367
_____________________________________________________________________
Retail
Best values of p, d & q for ARIMA model are p = 2, d = 0 & q = 0
AIC = 9.805242039364316
RSME =  1.5149900683945772
_____________________________________________________________________
Marketing, Advertising and PR
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 15.427466328852688
RSME =  0.4097088482531141
_____________________________________________________________________
Teacher Training and Education
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 49.274634806821396
RSME =  0.49915110781069477
_____________________________________________________________________
Accounting, Finance and Banking
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 4
AIC = 34.30028964020815
RSME =  0.49822620190911254
_____________________________________________________________________
Engineering and Manufacturing
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 38.97328708766992
RSME =  0.2849576199282521
_____________________________________________________________________
Healthcare
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 44.483984664391926
RSME =  0.663883630799776
_____________________________________________________________________
Media and Internet
Best values of p, d & q for ARIMA model are p = 2, d = 0 & q = 0
AIC = 53.83108395669083
RSME =  0.4801218602132597
_____________________________________________________________________
Recruitment and HR
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = 53.008499348567085
RSME =  0.44570827057732443
_____________________________________________________________________
Transport and Logistics
Best values of p, d & q for ARIMA model are p = 3, d = 0 & q = 4
AIC = 41.92212150286997
RSME =  1.1442535558526719
_____________________________________________________________________
Property and Consultation
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 31.168878140643002
RSME =  1.7635518789057532
_____________________________________________________________________
Public Service and Administration
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 46.63159470123368
RSME =  0.9078596671194797
_____________________________________________________________________
Hospitality and Event Management
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 40.05518308579733
RSME =  0.41427890199721085
_____________________________________________________________________

Quantile Transformation ( Normal )

In [21]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_values=d_it.values
    scaler=QuantileTransformer(output_distribution='normal')
    demand_values=demand_values.reshape(len(demand_values),1)
    scaler_fit=scaler.fit(demand_values)
    demand_nor=scaler.transform(demand_values)
    index=d_it.index
    #demand_nor=demand_nor.reshape(len(demand_values),1)
    demand_qtn=demand_nor.flatten()
    demand_qtn=pd.Series(demand_qtn,index=index)
    arima_model(demand_qtn)
Sales
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = 417.04999715834543
RSME =  1.5256795399406076
_____________________________________________________________________
Information Technology
Best values of p, d & q for ARIMA model are p = 3, d = 0 & q = 2
AIC = 385.0670042794568
RSME =  5.300319429562077
_____________________________________________________________________
Business Consulting and Management
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 378.8495288505094
RSME =  2.123459614971108
_____________________________________________________________________
Retail
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 1
AIC = 404.53776578503584
RSME =  2.657654035880869
_____________________________________________________________________
Marketing, Advertising and PR
Best values of p, d & q for ARIMA model are p = 2, d = 0 & q = 2
AIC = 358.6661412553324
RSME =  2.6857873979537454
_____________________________________________________________________
Teacher Training and Education
Best values of p, d & q for ARIMA model are p = 2, d = 0 & q = 1
AIC = 400.19936318096836
RSME =  3.8135130630995113
_____________________________________________________________________
Accounting, Finance and Banking
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 424.9170831820246
RSME =  1.902744468376322
_____________________________________________________________________
Engineering and Manufacturing
Best values of p, d & q for ARIMA model are p = 2, d = 1 & q = 3
AIC = 382.07610493746984
RSME =  3.025004190875835
_____________________________________________________________________
Healthcare
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = 386.91434426425883
RSME =  5.607992059423687
_____________________________________________________________________
Media and Internet
Best values of p, d & q for ARIMA model are p = 2, d = 0 & q = 0
AIC = 372.3001400851052
RSME =  4.904942943808859
_____________________________________________________________________
Recruitment and HR
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 338.1144446523362
RSME =  12.041511930345104
_____________________________________________________________________
Transport and Logistics
Best values of p, d & q for ARIMA model are p = 4, d = 0 & q = 4
AIC = 301.8209896229709
RSME =  9.56332068732986
_____________________________________________________________________
Property and Consultation
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 155.82438304058329
RSME =  11.515769133752777
_____________________________________________________________________
Public Service and Administration
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 239.03032109375002
RSME =  3.988808497082286
_____________________________________________________________________
Hospitality and Event Management
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 190.26705921068492
RSME =  3.5414634488197088
_____________________________________________________________________

Function Transformation

In [22]:
for i in sec.index[0:15]:
    it=df1[df1['Sector']==i]
    d_it=it.groupby('Posted_Date')['Posted_Date'].count()
    print(i)
    demand_values=d_it.values
    scaler=FunctionTransformer(np.log1p)
    demand_values=demand_values.reshape(len(demand_values),1)
    scaler_fit=scaler.fit(demand_values)
    demand_nor=scaler.transform(demand_values)
    index=d_it.index
    #demand_nor=demand_nor.reshape(len(demand_values),1)
    demand_ft=demand_nor.flatten()
    demand_ft=pd.Series(demand_ft,index=index)
    arima_model(demand_ft)
Sales
Best values of p, d & q for ARIMA model are p = 2, d = 0 & q = 4
AIC = 231.8437265495145
RSME =  0.7148439018027765
_____________________________________________________________________
Information Technology
Best values of p, d & q for ARIMA model are p = 3, d = 0 & q = 2
AIC = 215.51863827801805
RSME =  0.7629563926504035
_____________________________________________________________________
Business Consulting and Management
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 224.26692473555897
RSME =  1.2847689880753057
_____________________________________________________________________
Retail
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 1
AIC = 189.36378347066906
RSME =  0.61873598605622
_____________________________________________________________________
Marketing, Advertising and PR
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 1
AIC = 174.64675909478186
RSME =  1.8144652751069068
_____________________________________________________________________
Teacher Training and Education
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 210.8162184950262
RSME =  1.072096493637946
_____________________________________________________________________
Accounting, Finance and Banking
Best values of p, d & q for ARIMA model are p = 2, d = 0 & q = 3
AIC = 174.1924718707453
RSME =  0.44459836457005747
_____________________________________________________________________
Engineering and Manufacturing
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 172.2666903720641
RSME =  0.9777230672866583
_____________________________________________________________________
Healthcare
Best values of p, d & q for ARIMA model are p = 1, d = 0 & q = 0
AIC = 143.49399495584723
RSME =  0.9014863873414338
_____________________________________________________________________
Media and Internet
Best values of p, d & q for ARIMA model are p = 2, d = 0 & q = 0
AIC = 86.74400102322667
RSME =  0.7533452598115619
_____________________________________________________________________
Recruitment and HR
Best values of p, d & q for ARIMA model are p = 4, d = 0 & q = 2
AIC = 93.28397262097661
RSME =  2.4267072748888525
_____________________________________________________________________
Transport and Logistics
Best values of p, d & q for ARIMA model are p = 4, d = 0 & q = 3
AIC = 83.38421590016482
RSME =  1.2421105328532664
_____________________________________________________________________
Property and Consultation
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 1
AIC = 64.15130645530935
RSME =  0.7479497244769953
_____________________________________________________________________
Public Service and Administration
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 27.10358146179016
RSME =  0.3800128144943346
_____________________________________________________________________
Hospitality and Event Management
Best values of p, d & q for ARIMA model are p = 0, d = 0 & q = 0
AIC = 17.877978900486056
RSME =  0.361616576752322
_____________________________________________________________________

Result Analysis

In [118]:
for i in sec.index[0:15]:
    dtsl=[]
    dts=pd.read_excel('DataTransformation_Sector/'+i+'.xlsx')
    print(i,'\n')
    dts.replace(u'\xa0',u'', regex=True, inplace=True)
    dts=dts[(dts['P-VALUE']<= 0.05)&(dts['CRITICAL VALUE(1%)']>dts['TEST STATISTIC'])&(dts['CRITICAL VALUE(5%)']>dts['TEST STATISTIC'])]
    dts=dts.sort_values('P-VALUE')
    dtsl=dts['TRANSFORMATION'][0:5].values
    j=1
    for i in dtsl:
        print(str(j)+'. '+i)
        j+=1
    print("___________________________________________________________________________________________n")
    
Sales 

1. Differencing Transformation (Shift 1)
2. Quantile Transformation ( Normal )
3. Log Transformation
4. MaxMinScaler Transfromation (Normalization)
5. StandardScaler Transformation (Standardization)
___________________________________________________________________________________________n
Information Technology 

1. Differencing Transformation (Shift 3)
2. MaxMinScaler Transfromation (Normalization)
3. StandardScaler Transformation (Standardization)
4. MaxAbsScaler Transformation
5. Robust Scaler Transformation
___________________________________________________________________________________________n
Business Consulting and Management 

1. Quantile Transformation ( Normal )
2. MaxMinScaler Transfromation (Normalization)
3. StandardScaler Transformation (Standardization)
4. MaxAbsScaler Transformation
5. Robust Scaler Transformation
___________________________________________________________________________________________n
Retail 

1. MaxMinScaler Transfromation (Normalization)
2. Quantile Transformation ( Normal )
3. StandardScaler Transformation (Standardization)
4. MaxAbsScaler Transformation
5. Robust Scaler Transformation
___________________________________________________________________________________________n
Marketing, Advertising and PR 

1. Differencing Transformation (Shift 1)
2. MaxMinScaler Transfromation (Normalization)
3. StandardScaler Transformation (Standardization)
4. MaxAbsScaler Transformation
5. Robust Scaler Transformation
___________________________________________________________________________________________n
Teacher Training and Education 

1. MaxMinScaler Transfromation (Normalization)
2. StandardScaler Transformation (Standardization)
3. MaxAbsScaler Transformation
4. Robust Scaler Transformation
5. Square Root Transformation
___________________________________________________________________________________________n
Accounting, Finance and Banking 

1. Differencing Transformation (Shift 1)
2. Quantile Transformation ( Normal )
3. Quantile Transformation ( Uniform )
4. Power Transformation ( Box-Cox )
5. Log Transformation
___________________________________________________________________________________________n
Engineering and Manufacturing 

1. MaxMinScaler Transfromation (Normalization)
2. StandardScaler Transformation (Standardization)
3. MaxAbsScaler Transformation
4. Robust Scaler Transformation
5. Quantile Transformation ( Normal )
___________________________________________________________________________________________n
Healthcare 

1. MaxMinScaler Transfromation (Normalization)
2. StandardScaler Transformation (Standardization)
3. MaxAbsScaler Transformation
4. Robust Scaler Transformation
5. Differencing Transformation (Shift 1)
___________________________________________________________________________________________n
Media and Internet 

1. Quantile Transformation ( Normal )
2. Power Transformation ( Box-Cox )
3. Quantile Transformation ( Uniform )
4. Log Transformation
5. Power Transformation ( Yeo-Johnson )
___________________________________________________________________________________________n
Recruitment and HR 

1. MaxMinScaler Transfromation (Normalization)
2. StandardScaler Transformation (Standardization)
3. MaxAbsScaler Transformation
4. Robust Scaler Transformation
5. Quantile Transformation ( Normal )
___________________________________________________________________________________________n
Transport and Logistics 

1. Differencing Transformation (Shift 1)
2. Quantile Transformation ( Normal )
3. Function Transformation
4. Cube Root Transfromation
5. Square Root Transformation
___________________________________________________________________________________________n
Property and Consultation 

1. Quantile Transformation ( Uniform )
2. Power Transformation ( Box-Cox )
3. Power Transformation ( Yeo-Johnson )
4. Quantile Transformation ( Normal )
5. Differencing Transformation (Shift 3)
___________________________________________________________________________________________n
Public Service and Administration 

1. Differencing Transformation (Shift 2)
2. Quantile Transformation ( Normal )
3. Quantile Transformation ( Uniform )
4. Power Transformation ( Box-Cox )
5. Power Transformation ( Yeo-Johnson )
___________________________________________________________________________________________n
Hospitality and Event Management 

1. Differencing Transformation (Shift 1)
2. Quantile Transformation ( Normal )
3. Log Transformation
4. Cube Root Transfromation
5. Function Transformation
___________________________________________________________________________________________n
In [127]:
for i in sec.index[0:15]:
    dtsl=[]
    dts=pd.read_excel('DataTransformation_Sector/'+i+'.xlsx')
    print(i)
    print("------------------------------------\n")
    dts.replace(u'\xa0',u'', regex=True, inplace=True)
    dtsl=dts[(dts['P-VALUE']<= 0.05)&(dts['CRITICAL VALUE(1%)']>dts['TEST STATISTIC'])&(dts['CRITICAL VALUE(5%)']>dts['TEST STATISTIC'])&(dts['TRANSFORMATION']=='MaxAbsScaler Transformation')]
    print(dtsl.values)
    print("___________________________________________________________________________________________\n")
Sales
------------------------------------

[[11 'MaxAbsScaler Transformation' -6.358995 -3.492996 -2.888955
  2.501454e-08]]
___________________________________________________________________________________________

Information Technology
------------------------------------

[[11 'MaxAbsScaler Transformation' -6.455822 -3.496818 -2.890611
  1.484382e-08]]
___________________________________________________________________________________________

Business Consulting and Management
------------------------------------

[[11 'MaxAbsScaler Transformation' -7.549097 -3.496818 -2.890611
  3.224296e-11]]
___________________________________________________________________________________________

Retail
------------------------------------

[[11 'MaxAbsScaler Transformation' -3.920508 -3.496818 -2.890611 0.001889]]
___________________________________________________________________________________________

Marketing, Advertising and PR
------------------------------------

[[11 'MaxAbsScaler Transformation' -5.852893 -3.501912 -2.892815
  3.555029e-07]]
___________________________________________________________________________________________

Teacher Training and Education
------------------------------------

[[11 'MaxAbsScaler Transformation' -9.778959 -3.507853 -2.895382
  6.814323e-17]]
___________________________________________________________________________________________

Accounting, Finance and Banking
------------------------------------

[[11 'MaxAbsScaler Transformation' -4.837813 -3.499637 -2.891831 4.6e-05]]
___________________________________________________________________________________________

Engineering and Manufacturing
------------------------------------

[[11 'MaxAbsScaler Transformation' -8.544093 -3.510712 -2.896616
  9.60719e-14]]
___________________________________________________________________________________________

Healthcare
------------------------------------

[[11 'MaxAbsScaler Transformation' -8.476088 -3.512738 -2.89749
  1.434169e-13]]
___________________________________________________________________________________________

Media and Internet
------------------------------------

[[11 'MaxAbsScaler Transformation' -7.826999 -3.520713 -2.900925
  6.441844e-12]]
___________________________________________________________________________________________

Recruitment and HR
------------------------------------

[[11 'MaxAbsScaler Transformation' -7.425173 -3.531955 -2.905755
  6.577578e-11]]
___________________________________________________________________________________________

Transport and Logistics
------------------------------------

[[11 'MaxAbsScaler Transformation' -5.172061 -3.542413 -2.910236 1e-05]]
___________________________________________________________________________________________

Property and Consultation
------------------------------------

[]
___________________________________________________________________________________________

Public Service and Administration
------------------------------------

[]
___________________________________________________________________________________________

Hospitality and Event Management
------------------------------------

[[11 'MaxAbsScaler Transformation' -5.169368 -3.632743 -2.94851 1e-05]]
___________________________________________________________________________________________

In [144]:
for i in sec.index[0:15]:
    dtsl=[]
    dts=pd.read_excel('DataTransformation_Sector/'+i+'.xlsx')
    print(i)
    print("------------------------------------\n")
    dts.replace(u'\xa0',u'', regex=True, inplace=True)
    dtsl=dts[(dts['P-VALUE']<= 0.05)&((dts['CRITICAL VALUE(1%)']>dts['TEST STATISTIC'])|(dts['CRITICAL VALUE(5%)']>dts['TEST STATISTIC']))&(dts['TRANSFORMATION']=='Cube Root Transfromation')]
    print(dtsl.values)
    print("___________________________________________________________________________________________\n")
Sales
------------------------------------

[[5 'Cube Root Transfromation' -6.148129 -3.492996 -2.888955 7.678167e-08]]
___________________________________________________________________________________________

Information Technology
------------------------------------

[[5 'Cube Root Transfromation' -5.898106 -3.496818 -2.890611 2.819881e-07]]
___________________________________________________________________________________________

Business Consulting and Management
------------------------------------

[[5 'Cube Root Transfromation' -6.991222 -3.496818 -2.890611 7.73596e-10]]
___________________________________________________________________________________________

Retail
------------------------------------

[[5 'Cube Root Transfromation' -3.716545 -3.496818 -2.890611 0.003886]]
___________________________________________________________________________________________

Marketing, Advertising and PR
------------------------------------

[[5 'Cube Root Transfromation' -4.270328 -3.502705 -2.893158 0.0005]]
___________________________________________________________________________________________

Teacher Training and Education
------------------------------------

[[5 'Cube Root Transfromation' -9.210272 -3.507853 -2.895382 1.896004e-15]]
___________________________________________________________________________________________

Accounting, Finance and Banking
------------------------------------

[[5 'Cube Root Transfromation' -5.775922 -3.499637 -2.891831 5.259927e-07]]
___________________________________________________________________________________________

Engineering and Manufacturing
------------------------------------

[[5 'Cube Root Transfromation' -7.276472 -3.510712 -2.896616 1.539766e-10]]
___________________________________________________________________________________________

Healthcare
------------------------------------

[[5 'Cube Root Transfromation' -7.153826 -3.512738 -2.89749 3.091522e-10]]
___________________________________________________________________________________________

Media and Internet
------------------------------------

[[5 'Cube Root Transfromation' -8.43037 -3.520713 -2.900925 1.877267e-13]]
___________________________________________________________________________________________

Recruitment and HR
------------------------------------

[[5 'Cube Root Transfromation' -7.017947 -3.531955 -2.905755 6.657201e-10]]
___________________________________________________________________________________________

Transport and Logistics
------------------------------------

[[5 'Cube Root Transfromation' -5.311113 -3.542413 -2.910236 5e-06]]
___________________________________________________________________________________________

Property and Consultation
------------------------------------

[[5 'Cube Root Transfromation' -3.48167 -3.688926 -2.971989 0.008471]]
___________________________________________________________________________________________

Public Service and Administration
------------------------------------

[[5 'Cube Root Transfromation' -5.494984 -3.584829 -2.928299 2e-06]]
___________________________________________________________________________________________

Hospitality and Event Management
------------------------------------

[[5 'Cube Root Transfromation' -5.215911 -3.632743 -2.94851 8e-06]]
___________________________________________________________________________________________

Creating a ARIMA model for final prediction

In [181]:
sect_abs=['Sales', 'Information Technology', 'Business Consulting and Management',
       'Retail', 'Marketing, Advertising and PR',
       'Teacher Training and Education', 'Accounting, Finance and Banking',
       'Engineering and Manufacturing', 'Healthcare', 'Media and Internet',
       'Recruitment and HR', 'Transport and Logistics',
       'Hospitality and Event Management']
sect_cbrt=['Property and Consultation','Public Service and Administration']
sect_abs_pdq=[(3,0,2),(1,0,0),(1,0,0),(1,0,1),(1,0,0),(0,0,0),(1,0,0),(0,0,0),(0,0,0),(0,0,0),(0,0,0),(0,0,1),(0,0,0)]
sect_cbrt_pdq=[(0,0,1),(0,0,0)]

Sector wise Prediction

1. Sales Prediction

In [224]:
it=df1[df1['Sector']==sect_abs[0]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[0]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[0])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic))
Sales Prediction
AIC = -37.01350309298522
In [220]:
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
Out[220]:
<matplotlib.axes._subplots.AxesSubplot at 0x2019a8e6550>

2. Information Technology

In [240]:
it=df1[df1['Sector']==sect_abs[1]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[1]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[1])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic))
Information Technology Prediction
In [238]:
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
Out[238]:
<matplotlib.axes._subplots.AxesSubplot at 0x20198c071d0>

3. Business Consulting and Management

In [241]:
it=df1[df1['Sector']==sect_abs[2]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[2]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[2])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic))
Business Consulting and Management Prediction
In [242]:
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
Out[242]:
<matplotlib.axes._subplots.AxesSubplot at 0x20197be4e48>

4. Retail

In [243]:
it=df1[df1['Sector']==sect_abs[3]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[3]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[3])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic))
Retail Prediction
In [244]:
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
Out[244]:
<matplotlib.axes._subplots.AxesSubplot at 0x2019aa32630>

5. Marketing, Advertising and PR

In [245]:
it=df1[df1['Sector']==sect_abs[4]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[4]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[4])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic))
Marketing, Advertising and PR Prediction
In [246]:
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
Out[246]:
<matplotlib.axes._subplots.AxesSubplot at 0x20198bf0908>

6. Teacher Training and Education

In [247]:
it=df1[df1['Sector']==sect_abs[5]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[5]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[5])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic))
Teacher Training and Education Prediction
In [248]:
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
Out[248]:
<matplotlib.axes._subplots.AxesSubplot at 0x2019aad1b38>

7. Accounting, Finance and Banking

In [249]:
it=df1[df1['Sector']==sect_abs[6]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[6]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[6])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic))
Accounting, Finance and Banking Prediction
In [250]:
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
Out[250]:
<matplotlib.axes._subplots.AxesSubplot at 0x2019a711eb8>

8. Engineering and Manufacturing

In [251]:
it=df1[df1['Sector']==sect_abs[7]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[7]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[7])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic))
Engineering and Manufacturing Prediction
In [252]:
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
Out[252]:
<matplotlib.axes._subplots.AxesSubplot at 0x2019a857da0>

9. Healthcare

In [253]:
it=df1[df1['Sector']==sect_abs[8]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[8]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[8])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic))
Healthcare Prediction
In [254]:
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
Out[254]:
<matplotlib.axes._subplots.AxesSubplot at 0x2019a04aa20>

10. Media and Internet

In [255]:
it=df1[df1['Sector']==sect_abs[9]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[9]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[9])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic))
Media and Internet Prediction
In [256]:
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
Out[256]:
<matplotlib.axes._subplots.AxesSubplot at 0x2019a1792e8>

11. Recruitment and HR

In [258]:
it=df1[df1['Sector']==sect_abs[10]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[10]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[10])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic)10
Recruitment and HR Prediction
In [259]:
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
Out[259]:
<matplotlib.axes._subplots.AxesSubplot at 0x2019a24d668>

12. Transport and Logistics

In [261]:
it=df1[df1['Sector']==sect_abs[11]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[11]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[11])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic)10
Transport and Logistics Prediction
In [262]:
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
Out[262]:
<matplotlib.axes._subplots.AxesSubplot at 0x20198b98048>

14. Public Service and Administration

In [235]:
it=df1[df1['Sector']==sect_cbrt[1]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_cbrt[1]+" Prediction")
demand_cbrt=np.cbrt(d_it)
demand_model=ARIMA(demand_cbrt,order=sect_cbrt_pdq[1])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=114)[0]
fig, ax = plt.subplots()
ax=demand_model_fit.plot_predict(1,190,ax=ax)
print("AIC = "+str(demand_model_fit.aic))
Public Service and Administration Prediction
AIC = -13.949782686634101
In [236]:
demand_fore=np.power(demand_forecast,3)
index=pd.date_range(start='04/10/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
Out[236]:
<matplotlib.axes._subplots.AxesSubplot at 0x20199ff4898>

15. Property and Consultation

In [263]:
it=df1[df1['Sector']==sect_abs[12]]
d_it=it.groupby('Posted_Date')['Posted_Date'].count()
print(sect_abs[12]+" Prediction")
demand_values=d_it.values
scaler=MaxAbsScaler()
demand_values=demand_values.reshape(len(demand_values),1)
scaler_fit=scaler.fit(demand_values)
demand_nor=scaler.transform(demand_values)
index=d_it.index
demand_abs=demand_nor.flatten()
demand_abs=pd.Series(demand_abs,index=index)
demand_model=ARIMA(demand_abs,order=sect_abs_pdq[12])
demand_model_fit=demand_model.fit()
demand_forecast=demand_model_fit.forecast(steps=112)[0]
fig, ax = plt.subplots()
demamd_f=demand_forecast
ax=demand_model_fit.plot_predict(1,190,ax=ax)
#print("AIC = "+str(demand_model_fit.aic)10
Hospitality and Event Management Prediction
In [264]:
demamd_re=demamd_f.reshape(len(demamd_f),1)
demand_fore=scaler.inverse_transform(demamd_re)
index=pd.date_range(start='04/12/2020', end='08/01/2020') # mm/dd/yyyy
demand_fore=demand_fore.flatten()
demand_prediction=pd.Series(demand_fore,index=index)
d_it.plot(figsize=(15,10))
demand_prediction.plot(figsize=(10,6))
Out[264]:
<matplotlib.axes._subplots.AxesSubplot at 0x2019c970d68>
In [ ]:
14